File indexing completed on 2024-05-12 15:34:35

0001 /*
0002     This file is part of the KContacts framework.
0003     SPDX-FileCopyrightText: 2003 Tobias Koenig <tokoe@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #include "kcontacts_debug.h"
0009 #include "vcardparser_p.h"
0010 #include <KCodecs>
0011 #include <QTextCodec>
0012 #include <functional>
0013 
0014 // This cache for QString::fromLatin1() isn't about improving speed, but about reducing memory usage by sharing common strings
0015 class StringCache
0016 {
0017 public:
0018     QString fromLatin1(const QByteArray &value)
0019     {
0020         if (value.isEmpty()) {
0021             return QString();
0022         }
0023 
0024         auto it = m_values.constFind(value);
0025         if (it != m_values.constEnd()) {
0026             return it.value();
0027         }
0028 
0029         QString string = QString::fromLatin1(value);
0030         m_values.insert(value, string);
0031         return string;
0032     }
0033 
0034 private:
0035     QHash<QByteArray, QString> m_values;
0036 };
0037 
0038 using namespace KContacts;
0039 
0040 static void addEscapes(QByteArray &str, bool excludeEscapedComma)
0041 {
0042     str.replace('\\', "\\\\");
0043     if (!excludeEscapedComma) {
0044         str.replace(',', "\\,");
0045     }
0046     str.replace('\r', "\\r");
0047     str.replace('\n', "\\n");
0048 }
0049 
0050 static void removeEscapes(QByteArray &str)
0051 {
0052     // It's more likely that no escape is present, so add fast path
0053     if (!str.contains('\\')) {
0054         return;
0055     }
0056     str.replace("\\n", "\n");
0057     str.replace("\\N", "\n");
0058     str.replace("\\r", "\r");
0059     str.replace("\\,", ",");
0060     str.replace("\\\\", "\\");
0061 }
0062 
0063 class VCardLineParser
0064 {
0065 public:
0066     VCardLineParser(StringCache &cache, std::function<QByteArray()> fetchAnotherLine)
0067         : m_cache(cache)
0068         , m_fetchAnotherLine(fetchAnotherLine)
0069     {
0070     }
0071 
0072     void parseLine(const QByteArray &currentLine, VCardLine *vCardLine);
0073 
0074 private:
0075     void addParameter(const QByteArray &paramKey, const QByteArray &paramValue);
0076 
0077 private:
0078     StringCache &m_cache;
0079     std::function<QByteArray()> m_fetchAnotherLine;
0080 
0081     VCardLine *m_vCardLine = nullptr;
0082     QByteArray m_encoding;
0083     QByteArray m_charset;
0084 };
0085 
0086 void VCardLineParser::addParameter(const QByteArray &paramKey, const QByteArray &paramValue)
0087 {
0088     if (paramKey == "encoding") {
0089         m_encoding = paramValue.toLower();
0090     } else if (paramKey == "charset") {
0091         m_charset = paramValue.toLower();
0092     }
0093     // qDebug() << "  add parameter" << paramKey << "    =    " << paramValue;
0094     m_vCardLine->addParameter(m_cache.fromLatin1(paramKey), m_cache.fromLatin1(paramValue));
0095 }
0096 
0097 void VCardLineParser::parseLine(const QByteArray &currentLine, KContacts::VCardLine *vCardLine)
0098 {
0099     // qDebug() << currentLine;
0100     m_vCardLine = vCardLine;
0101     // The syntax is key:value, but the key can contain semicolon-separated parameters, which can contain a ':', so indexOf(':') is wrong.
0102     // EXAMPLE: ADR;GEO="geo:22.500000,45.099998";LABEL="My Label";TYPE=home:P.O. Box 101;;;Any Town;CA;91921-1234;
0103     // Therefore we need a small state machine, just the way I like it.
0104     enum State {
0105         StateInitial,
0106         StateParamKey,
0107         StateParamValue,
0108         StateQuotedValue,
0109         StateAfterParamValue,
0110         StateValue,
0111     };
0112     State state = StateInitial;
0113     const int lineLength = currentLine.length();
0114     const char *lineData = currentLine.constData(); // to skip length checks from at() in debug mode
0115     QByteArray paramKey;
0116     QByteArray paramValue;
0117     int start = 0;
0118     int pos = 0;
0119     for (; pos < lineLength; ++pos) {
0120         const char ch = lineData[pos];
0121         const bool colonOrSemicolon = (ch == ';' || ch == ':');
0122         switch (state) {
0123         case StateInitial:
0124             if (colonOrSemicolon) {
0125                 const QByteArray identifier = currentLine.mid(start, pos - start);
0126                 // qDebug() << " identifier" << identifier;
0127                 vCardLine->setIdentifier(m_cache.fromLatin1(identifier));
0128                 start = pos + 1;
0129             }
0130             if (ch == ';') {
0131                 state = StateParamKey;
0132             } else if (ch == ':') {
0133                 state = StateValue;
0134             } else if (ch == '.') {
0135                 vCardLine->setGroup(m_cache.fromLatin1(currentLine.mid(start, pos - start)));
0136                 start = pos + 1;
0137             }
0138             break;
0139         case StateParamKey:
0140             if (colonOrSemicolon || ch == '=') {
0141                 paramKey = currentLine.mid(start, pos - start);
0142                 start = pos + 1;
0143             }
0144             if (colonOrSemicolon) {
0145                 // correct the so-called 2.1 'standard'
0146                 paramValue = paramKey;
0147                 const QByteArray lowerKey = paramKey.toLower();
0148                 if (lowerKey == "quoted-printable" || lowerKey == "base64") {
0149                     paramKey = "encoding";
0150                 } else {
0151                     paramKey = "type";
0152                 }
0153                 addParameter(paramKey, paramValue);
0154             }
0155             if (ch == ';') {
0156                 state = StateParamKey;
0157             } else if (ch == ':') {
0158                 state = StateValue;
0159             } else if (ch == '=') {
0160                 state = StateParamValue;
0161             }
0162             break;
0163         case StateQuotedValue:
0164             if (ch == '"' || (ch == ',' && paramKey.toLower() == "type")) {
0165                 // TODO the hack above is for TEL;TYPE=\"voice,home\":... without breaking GEO.... TODO: check spec
0166                 paramValue = currentLine.mid(start, pos - start);
0167                 addParameter(paramKey.toLower(), paramValue);
0168                 start = pos + 1;
0169                 if (ch == '"') {
0170                     state = StateAfterParamValue; // to avoid duplicating code with StateParamValue, we use this intermediate state for one char
0171                 }
0172             }
0173             break;
0174         case StateParamValue:
0175             if (colonOrSemicolon || ch == ',') {
0176                 paramValue = currentLine.mid(start, pos - start);
0177                 addParameter(paramKey.toLower(), paramValue);
0178                 start = pos + 1;
0179             }
0180             // fall-through intended
0181             Q_FALLTHROUGH();
0182         case StateAfterParamValue:
0183             if (ch == ';') {
0184                 state = StateParamKey;
0185                 start = pos + 1;
0186             } else if (ch == ':') {
0187                 state = StateValue;
0188             } else if (pos == start && ch == '"') { // don't treat foo"bar" as quoted - TODO check the vCard 3.0 spec.
0189                 state = StateQuotedValue;
0190                 start = pos + 1;
0191             }
0192             break;
0193         case StateValue:
0194             Q_UNREACHABLE();
0195             break;
0196         }
0197 
0198         if (state == StateValue) {
0199             break;
0200         }
0201     }
0202 
0203     if (state != StateValue) { // invalid line, no ':'
0204         return;
0205     }
0206 
0207     QByteArray value = currentLine.mid(pos + 1);
0208     removeEscapes(value);
0209 
0210     QByteArray output;
0211     bool wasBase64Encoded = false;
0212 
0213     if (!m_encoding.isEmpty()) {
0214         // have to decode the data
0215         if (m_encoding == "b" || m_encoding == "base64") {
0216             output = QByteArray::fromBase64(value);
0217             wasBase64Encoded = true;
0218         } else if (m_encoding == "quoted-printable") {
0219             // join any qp-folded lines
0220             while (value.endsWith('=')) {
0221                 value.chop(1); // remove the '='
0222                 value.append(m_fetchAnotherLine());
0223             }
0224             KCodecs::quotedPrintableDecode(value, output);
0225         } else if (m_encoding == "8bit") {
0226             output = value;
0227         } else {
0228             qDebug("Unknown vcard encoding type!");
0229         }
0230     } else {
0231         output = value;
0232     }
0233 
0234     if (!m_charset.isEmpty()) {
0235         // have to convert the data
0236         QTextCodec *codec = QTextCodec::codecForName(m_charset);
0237         if (codec) {
0238             vCardLine->setValue(codec->toUnicode(output));
0239         } else {
0240             vCardLine->setValue(QString::fromUtf8(output));
0241         }
0242     } else if (wasBase64Encoded) {
0243         vCardLine->setValue(output);
0244     } else {
0245         vCardLine->setValue(QString::fromUtf8(output));
0246     }
0247 }
0248 
0249 ////
0250 
0251 VCardParser::VCardParser()
0252 {
0253 }
0254 
0255 VCardParser::~VCardParser()
0256 {
0257 }
0258 
0259 VCard::List VCardParser::parseVCards(const QByteArray &text)
0260 {
0261     VCard currentVCard;
0262     VCard::List vCardList;
0263     QByteArray currentLine;
0264 
0265     int lineStart = 0;
0266     int lineEnd = text.indexOf('\n');
0267 
0268     bool inVCard = false;
0269 
0270     StringCache cache;
0271     for (; lineStart != text.size() + 1;
0272          lineStart = lineEnd + 1, lineEnd = (text.indexOf('\n', lineStart) == -1) ? text.size() : text.indexOf('\n', lineStart)) {
0273         QByteArray cur = text.mid(lineStart, lineEnd - lineStart);
0274         // remove the trailing \r, left from \r\n
0275         if (cur.endsWith('\r')) {
0276             cur.chop(1);
0277         }
0278 
0279         if (cur.startsWith(' ') //
0280             || cur.startsWith('\t')) { // folded line => append to previous
0281             currentLine.append(cur.mid(1));
0282             continue;
0283         } else {
0284             if (cur.trimmed().isEmpty()) { // empty line
0285                 continue;
0286             }
0287             if (inVCard && !currentLine.isEmpty()) { // now parse the line
0288                 VCardLine vCardLine;
0289 
0290                 // Provide a way for the parseVCardLine function to read more lines (for quoted-printable support)
0291                 auto fetchAnotherLine = [&text, &lineStart, &lineEnd, &cur]() -> QByteArray {
0292                     const QByteArray ret = cur;
0293                     lineStart = lineEnd + 1;
0294                     lineEnd = text.indexOf('\n', lineStart);
0295                     if (lineEnd != -1) {
0296                         cur = text.mid(lineStart, lineEnd - lineStart);
0297                         // remove the trailing \r, left from \r\n
0298                         if (cur.endsWith('\r')) {
0299                             cur.chop(1);
0300                         }
0301                     }
0302                     return ret;
0303                 };
0304 
0305                 VCardLineParser lineParser(cache, fetchAnotherLine);
0306 
0307                 lineParser.parseLine(currentLine, &vCardLine);
0308 
0309                 currentVCard.addLine(vCardLine);
0310             }
0311 
0312             // we do not save the start and end tag as vcardline
0313             if (qstrnicmp(cur.constData(), "begin:vcard", 11) == 0) {
0314                 inVCard = true;
0315                 currentLine.clear();
0316                 currentVCard.clear(); // flush vcard
0317                 continue;
0318             }
0319 
0320             if (qstrnicmp(cur.constData(), "end:vcard", 9) == 0) {
0321                 inVCard = false;
0322                 vCardList.append(currentVCard);
0323                 currentLine.clear();
0324                 currentVCard.clear(); // flush vcard
0325                 continue;
0326             }
0327 
0328             currentLine = cur;
0329         }
0330     }
0331 
0332     return vCardList;
0333 }
0334 
0335 static const int FOLD_WIDTH = 75;
0336 
0337 QByteArray VCardParser::createVCards(const VCard::List &list)
0338 {
0339     QByteArray text;
0340     QByteArray textLine;
0341     QString encodingType;
0342     QStringList params;
0343     QStringList values;
0344 
0345     VCardLine::List lines;
0346 
0347     bool hasEncoding;
0348 
0349     text.reserve(list.size() * 300); // reserve memory to be more efficient
0350 
0351     // iterate over the cards
0352     for (const VCard &card : list) {
0353         text.append("BEGIN:VCARD\r\n");
0354 
0355         QStringList idents = card.identifiers();
0356         // VERSION must be first
0357         if (idents.contains(QLatin1String("VERSION"))) {
0358             const QString str = idents.takeAt(idents.indexOf(QLatin1String("VERSION")));
0359             idents.prepend(str);
0360         }
0361 
0362         for (const auto &id : std::as_const(idents)) {
0363             lines = card.lines(id);
0364 
0365             // iterate over the lines
0366             for (const VCardLine &vline : std::as_const(lines)) {
0367                 QVariant val = vline.value();
0368                 if (val.isValid()) {
0369                     if (vline.hasGroup()) {
0370                         textLine = vline.group().toLatin1() + '.' + vline.identifier().toLatin1();
0371                     } else {
0372                         textLine = vline.identifier().toLatin1();
0373                     }
0374 
0375                     params = vline.parameterList();
0376                     hasEncoding = false;
0377                     if (!params.isEmpty()) { // we have parameters
0378                         for (const QString &param : std::as_const(params)) {
0379                             if (param == QLatin1String("encoding")) {
0380                                 hasEncoding = true;
0381                                 encodingType = vline.parameter(QStringLiteral("encoding")).toLower();
0382                             }
0383 
0384                             values = vline.parameters(param);
0385                             for (const QString &str : std::as_const(values)) {
0386                                 textLine.append(';' + param.toLatin1().toUpper());
0387                                 if (!str.isEmpty()) {
0388                                     textLine.append('=' + str.toLatin1());
0389                                 }
0390                             }
0391                         }
0392                     }
0393 
0394                     QByteArray input;
0395                     QByteArray output;
0396                     bool checkMultibyte = false; // avoid splitting a multibyte character
0397 
0398                     // handle charset
0399                     const QString charset = vline.parameter(QStringLiteral("charset"));
0400                     if (!charset.isEmpty()) {
0401                         // have to convert the data
0402                         const QString value = vline.value().toString();
0403                         QTextCodec *codec = QTextCodec::codecForName(charset.toLatin1());
0404                         if (codec) {
0405                             input = codec->fromUnicode(value);
0406                         } else {
0407                             checkMultibyte = true;
0408                             input = value.toUtf8();
0409                         }
0410                     } else if (vline.value().type() == QVariant::ByteArray) {
0411                         input = vline.value().toByteArray();
0412                     } else {
0413                         checkMultibyte = true;
0414                         input = vline.value().toString().toUtf8();
0415                     }
0416 
0417                     // handle encoding
0418                     if (hasEncoding) { // have to encode the data
0419                         if (encodingType == QLatin1Char('b')) {
0420                             checkMultibyte = false;
0421                             output = input.toBase64();
0422                         } else if (encodingType == QLatin1String("quoted-printable")) {
0423                             checkMultibyte = false;
0424                             KCodecs::quotedPrintableEncode(input, output, false);
0425                         }
0426                     } else {
0427                         output = input;
0428                     }
0429                     addEscapes(output, (vline.identifier() == QLatin1String("CATEGORIES") || vline.identifier() == QLatin1String("GEO")));
0430 
0431                     if (!output.isEmpty()) {
0432                         textLine.append(':' + output);
0433 
0434                         if (textLine.length() > FOLD_WIDTH) { // we have to fold the line
0435                             if (checkMultibyte) {
0436                                 // RFC 6350: Multi-octet characters MUST remain contiguous.
0437                                 // we know that textLine contains UTF-8 encoded characters
0438                                 int lineLength = 0;
0439                                 for (int i = 0; i < textLine.length(); ++i) {
0440                                     if ((textLine[i] & 0xC0) == 0xC0) { // a multibyte sequence follows
0441                                         int sequenceLength = 2;
0442                                         if ((textLine[i] & 0xE0) == 0xE0) {
0443                                             sequenceLength = 3;
0444                                         } else if ((textLine[i] & 0xF0) == 0xF0) {
0445                                             sequenceLength = 4;
0446                                         }
0447                                         if ((lineLength + sequenceLength) > FOLD_WIDTH) {
0448                                             // the current line would be too long. fold it
0449                                             text += "\r\n " + textLine.mid(i, sequenceLength);
0450                                             lineLength = 1 + sequenceLength; // incl. leading space
0451                                         } else {
0452                                             text += textLine.mid(i, sequenceLength);
0453                                             lineLength += sequenceLength;
0454                                         }
0455                                         i += sequenceLength - 1;
0456                                     } else {
0457                                         text += textLine[i];
0458                                         ++lineLength;
0459                                     }
0460                                     if ((lineLength == FOLD_WIDTH) && (i < (textLine.length() - 1))) {
0461                                         text += "\r\n ";
0462                                         lineLength = 1; // leading space
0463                                     }
0464                                 }
0465                                 text += "\r\n";
0466                             } else {
0467                                 for (int i = 0; i <= (textLine.length() / FOLD_WIDTH); ++i) {
0468                                     text.append((i == 0 ? "" : " ") + textLine.mid(i * FOLD_WIDTH, FOLD_WIDTH) + "\r\n");
0469                                 }
0470                             }
0471                         } else {
0472                             text.append(textLine);
0473                             text.append("\r\n");
0474                         }
0475                     }
0476                 }
0477             }
0478         }
0479 
0480         text.append("END:VCARD\r\n");
0481         text.append("\r\n");
0482     }
0483 
0484     return text;
0485 }