File indexing completed on 2024-04-28 03:53:33

0001 /*
0002     This file is part of the KContacts framework.
0003     SPDX-FileCopyrightText: 2003 Tobias Koenig <tokoe@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #include "kcontacts_debug.h"
0009 #include "vcardparser_p.h"
0010 #include <KCodecs>
0011 #include <QStringDecoder>
0012 #include <QStringEncoder>
0013 #include <functional>
0014 
0015 // This cache for QString::fromLatin1() isn't about improving speed, but about reducing memory usage by sharing common strings
0016 class StringCache
0017 {
0018 public:
0019     QString fromLatin1(const QByteArray &value)
0020     {
0021         if (value.isEmpty()) {
0022             return QString();
0023         }
0024 
0025         auto it = m_values.constFind(value);
0026         if (it != m_values.constEnd()) {
0027             return it.value();
0028         }
0029 
0030         QString string = QString::fromLatin1(value);
0031         m_values.insert(value, string);
0032         return string;
0033     }
0034 
0035 private:
0036     QHash<QByteArray, QString> m_values;
0037 };
0038 
0039 using namespace KContacts;
0040 
0041 static void addEscapes(QByteArray &str, bool excludeEscapedComma)
0042 {
0043     str.replace('\\', "\\\\");
0044     if (!excludeEscapedComma) {
0045         str.replace(',', "\\,");
0046     }
0047     str.replace('\r', "\\r");
0048     str.replace('\n', "\\n");
0049 }
0050 
0051 static void removeEscapes(QByteArray &str)
0052 {
0053     // It's more likely that no escape is present, so add fast path
0054     if (!str.contains('\\')) {
0055         return;
0056     }
0057     str.replace("\\n", "\n");
0058     str.replace("\\N", "\n");
0059     str.replace("\\r", "\r");
0060     str.replace("\\,", ",");
0061     str.replace("\\\\", "\\");
0062 }
0063 
0064 class VCardLineParser
0065 {
0066 public:
0067     VCardLineParser(StringCache &cache, std::function<QByteArray()> fetchAnotherLine)
0068         : m_cache(cache)
0069         , m_fetchAnotherLine(fetchAnotherLine)
0070     {
0071     }
0072 
0073     void parseLine(const QByteArray &currentLine, VCardLine *vCardLine);
0074 
0075 private:
0076     void addParameter(const QByteArray &paramKey, const QByteArray &paramValue);
0077 
0078 private:
0079     StringCache &m_cache;
0080     std::function<QByteArray()> m_fetchAnotherLine;
0081 
0082     VCardLine *m_vCardLine = nullptr;
0083     QByteArray m_encoding;
0084     QByteArray m_charset;
0085 };
0086 
0087 void VCardLineParser::addParameter(const QByteArray &paramKey, const QByteArray &paramValue)
0088 {
0089     if (paramKey == "encoding") {
0090         m_encoding = paramValue.toLower();
0091     } else if (paramKey == "charset") {
0092         m_charset = paramValue.toLower();
0093     }
0094     // qDebug() << "  add parameter" << paramKey << "    =    " << paramValue;
0095     m_vCardLine->addParameter(m_cache.fromLatin1(paramKey), m_cache.fromLatin1(paramValue));
0096 }
0097 
0098 void VCardLineParser::parseLine(const QByteArray &currentLine, KContacts::VCardLine *vCardLine)
0099 {
0100     // qDebug() << currentLine;
0101     m_vCardLine = vCardLine;
0102     // The syntax is key:value, but the key can contain semicolon-separated parameters, which can contain a ':', so indexOf(':') is wrong.
0103     // EXAMPLE: ADR;GEO="geo:22.500000,45.099998";LABEL="My Label";TYPE=home:P.O. Box 101;;;Any Town;CA;91921-1234;
0104     // Therefore we need a small state machine, just the way I like it.
0105     enum State {
0106         StateInitial,
0107         StateParamKey,
0108         StateParamValue,
0109         StateQuotedValue,
0110         StateAfterParamValue,
0111         StateValue,
0112     };
0113     State state = StateInitial;
0114     const int lineLength = currentLine.length();
0115     const char *lineData = currentLine.constData(); // to skip length checks from at() in debug mode
0116     QByteArray paramKey;
0117     QByteArray paramValue;
0118     int start = 0;
0119     int pos = 0;
0120     for (; pos < lineLength; ++pos) {
0121         const char ch = lineData[pos];
0122         const bool colonOrSemicolon = (ch == ';' || ch == ':');
0123         switch (state) {
0124         case StateInitial:
0125             if (colonOrSemicolon) {
0126                 const QByteArray identifier = currentLine.mid(start, pos - start);
0127                 // qDebug() << " identifier" << identifier;
0128                 vCardLine->setIdentifier(m_cache.fromLatin1(identifier));
0129                 start = pos + 1;
0130             }
0131             if (ch == ';') {
0132                 state = StateParamKey;
0133             } else if (ch == ':') {
0134                 state = StateValue;
0135             } else if (ch == '.') {
0136                 vCardLine->setGroup(m_cache.fromLatin1(currentLine.mid(start, pos - start)));
0137                 start = pos + 1;
0138             }
0139             break;
0140         case StateParamKey:
0141             if (colonOrSemicolon || ch == '=') {
0142                 paramKey = currentLine.mid(start, pos - start);
0143                 start = pos + 1;
0144             }
0145             if (colonOrSemicolon) {
0146                 // correct the so-called 2.1 'standard'
0147                 paramValue = paramKey;
0148                 const QByteArray lowerKey = paramKey.toLower();
0149                 if (lowerKey == "quoted-printable" || lowerKey == "base64") {
0150                     paramKey = "encoding";
0151                 } else {
0152                     paramKey = "type";
0153                 }
0154                 addParameter(paramKey, paramValue);
0155             }
0156             if (ch == ';') {
0157                 state = StateParamKey;
0158             } else if (ch == ':') {
0159                 state = StateValue;
0160             } else if (ch == '=') {
0161                 state = StateParamValue;
0162             }
0163             break;
0164         case StateQuotedValue:
0165             if (ch == '"' || (ch == ',' && paramKey.toLower() == "type")) {
0166                 // TODO the hack above is for TEL;TYPE=\"voice,home\":... without breaking GEO.... TODO: check spec
0167                 paramValue = currentLine.mid(start, pos - start);
0168                 addParameter(paramKey.toLower(), paramValue);
0169                 start = pos + 1;
0170                 if (ch == '"') {
0171                     state = StateAfterParamValue; // to avoid duplicating code with StateParamValue, we use this intermediate state for one char
0172                 }
0173             }
0174             break;
0175         case StateParamValue:
0176             if (colonOrSemicolon || ch == ',') {
0177                 paramValue = currentLine.mid(start, pos - start);
0178                 addParameter(paramKey.toLower(), paramValue);
0179                 start = pos + 1;
0180             }
0181             // fall-through intended
0182             Q_FALLTHROUGH();
0183         case StateAfterParamValue:
0184             if (ch == ';') {
0185                 state = StateParamKey;
0186                 start = pos + 1;
0187             } else if (ch == ':') {
0188                 state = StateValue;
0189             } else if (pos == start && ch == '"') { // don't treat foo"bar" as quoted - TODO check the vCard 3.0 spec.
0190                 state = StateQuotedValue;
0191                 start = pos + 1;
0192             }
0193             break;
0194         case StateValue:
0195             Q_UNREACHABLE();
0196             break;
0197         }
0198 
0199         if (state == StateValue) {
0200             break;
0201         }
0202     }
0203 
0204     if (state != StateValue) { // invalid line, no ':'
0205         return;
0206     }
0207 
0208     QByteArray value = currentLine.mid(pos + 1);
0209     removeEscapes(value);
0210 
0211     QByteArray output;
0212     bool wasBase64Encoded = false;
0213 
0214     if (!m_encoding.isEmpty()) {
0215         // have to decode the data
0216         if (m_encoding == "b" || m_encoding == "base64") {
0217             output = QByteArray::fromBase64(value);
0218             wasBase64Encoded = true;
0219         } else if (m_encoding == "quoted-printable") {
0220             // join any qp-folded lines
0221             while (value.endsWith('=')) {
0222                 value.chop(1); // remove the '='
0223                 value.append(m_fetchAnotherLine());
0224             }
0225             KCodecs::quotedPrintableDecode(value, output);
0226         } else if (m_encoding == "8bit") {
0227             output = value;
0228         } else {
0229             qDebug("Unknown vcard encoding type!");
0230         }
0231     } else {
0232         output = value;
0233     }
0234 
0235     if (!m_charset.isEmpty()) {
0236         // have to convert the data
0237         auto codec = QStringDecoder(m_charset.constData());
0238         if (codec.isValid()) {
0239             vCardLine->setValue(QVariant::fromValue<QString>(codec.decode(output)));
0240         } else {
0241             vCardLine->setValue(QString::fromUtf8(output));
0242         }
0243     } else if (wasBase64Encoded) {
0244         vCardLine->setValue(output);
0245     } else {
0246         vCardLine->setValue(QString::fromUtf8(output));
0247     }
0248 }
0249 
0250 ////
0251 
0252 VCardParser::VCardParser()
0253 {
0254 }
0255 
0256 VCardParser::~VCardParser()
0257 {
0258 }
0259 
0260 VCard::List VCardParser::parseVCards(const QByteArray &text)
0261 {
0262     VCard currentVCard;
0263     VCard::List vCardList;
0264     QByteArray currentLine;
0265 
0266     int lineStart = 0;
0267     int lineEnd = text.indexOf('\n');
0268 
0269     bool inVCard = false;
0270 
0271     StringCache cache;
0272     for (; lineStart != text.size() + 1;
0273          lineStart = lineEnd + 1, lineEnd = (text.indexOf('\n', lineStart) == -1) ? text.size() : text.indexOf('\n', lineStart)) {
0274         QByteArray cur = text.mid(lineStart, lineEnd - lineStart);
0275         // remove the trailing \r, left from \r\n
0276         if (cur.endsWith('\r')) {
0277             cur.chop(1);
0278         }
0279 
0280         if (cur.startsWith(' ') //
0281             || cur.startsWith('\t')) { // folded line => append to previous
0282             currentLine.append(cur.mid(1));
0283             continue;
0284         } else {
0285             if (cur.trimmed().isEmpty()) { // empty line
0286                 continue;
0287             }
0288             if (inVCard && !currentLine.isEmpty()) { // now parse the line
0289                 VCardLine vCardLine;
0290 
0291                 // Provide a way for the parseVCardLine function to read more lines (for quoted-printable support)
0292                 auto fetchAnotherLine = [&text, &lineStart, &lineEnd, &cur]() -> QByteArray {
0293                     const QByteArray ret = cur;
0294                     lineStart = lineEnd + 1;
0295                     lineEnd = text.indexOf('\n', lineStart);
0296                     if (lineEnd != -1) {
0297                         cur = text.mid(lineStart, lineEnd - lineStart);
0298                         // remove the trailing \r, left from \r\n
0299                         if (cur.endsWith('\r')) {
0300                             cur.chop(1);
0301                         }
0302                     }
0303                     return ret;
0304                 };
0305 
0306                 VCardLineParser lineParser(cache, fetchAnotherLine);
0307 
0308                 lineParser.parseLine(currentLine, &vCardLine);
0309 
0310                 currentVCard.addLine(vCardLine);
0311             }
0312 
0313             // we do not save the start and end tag as vcardline
0314             if (qstrnicmp(cur.constData(), "begin:vcard", 11) == 0) {
0315                 inVCard = true;
0316                 currentLine.clear();
0317                 currentVCard.clear(); // flush vcard
0318                 continue;
0319             }
0320 
0321             if (qstrnicmp(cur.constData(), "end:vcard", 9) == 0) {
0322                 inVCard = false;
0323                 vCardList.append(currentVCard);
0324                 currentLine.clear();
0325                 currentVCard.clear(); // flush vcard
0326                 continue;
0327             }
0328 
0329             currentLine = cur;
0330         }
0331     }
0332 
0333     return vCardList;
0334 }
0335 
0336 static const int FOLD_WIDTH = 75;
0337 
0338 QByteArray VCardParser::createVCards(const VCard::List &list)
0339 {
0340     QByteArray text;
0341     QByteArray textLine;
0342     QString encodingType;
0343     QStringList params;
0344     QStringList values;
0345 
0346     VCardLine::List lines;
0347 
0348     bool hasEncoding;
0349 
0350     text.reserve(list.size() * 300); // reserve memory to be more efficient
0351 
0352     // iterate over the cards
0353     for (const VCard &card : list) {
0354         text.append("BEGIN:VCARD\r\n");
0355 
0356         QStringList idents = card.identifiers();
0357         // VERSION must be first
0358         if (idents.contains(QLatin1String("VERSION"))) {
0359             const QString str = idents.takeAt(idents.indexOf(QLatin1String("VERSION")));
0360             idents.prepend(str);
0361         }
0362 
0363         for (const auto &id : std::as_const(idents)) {
0364             lines = card.lines(id);
0365 
0366             // iterate over the lines
0367             for (const VCardLine &vline : std::as_const(lines)) {
0368                 QVariant val = vline.value();
0369                 if (val.isValid()) {
0370                     if (vline.hasGroup()) {
0371                         textLine = vline.group().toLatin1() + '.' + vline.identifier().toLatin1();
0372                     } else {
0373                         textLine = vline.identifier().toLatin1();
0374                     }
0375 
0376                     params = vline.parameterList();
0377                     hasEncoding = false;
0378                     if (!params.isEmpty()) { // we have parameters
0379                         for (const QString &param : std::as_const(params)) {
0380                             if (param == QLatin1String("encoding")) {
0381                                 hasEncoding = true;
0382                                 encodingType = vline.parameter(QStringLiteral("encoding")).toLower();
0383                             }
0384 
0385                             values = vline.parameters(param);
0386                             for (const QString &str : std::as_const(values)) {
0387                                 textLine.append(';' + param.toLatin1().toUpper());
0388                                 if (!str.isEmpty()) {
0389                                     textLine.append('=' + str.toLatin1());
0390                                 }
0391                             }
0392                         }
0393                     }
0394 
0395                     QByteArray input;
0396                     QByteArray output;
0397                     bool checkMultibyte = false; // avoid splitting a multibyte character
0398 
0399                     // handle charset
0400                     const QString charset = vline.parameter(QStringLiteral("charset"));
0401                     if (!charset.isEmpty()) {
0402                         // have to convert the data
0403                         const QString value = vline.value().toString();
0404                         auto codec = QStringEncoder(charset.toLatin1().constData());
0405                         if (codec.isValid()) {
0406                             input = codec.encode(value);
0407                         } else {
0408                             checkMultibyte = true;
0409                             input = value.toUtf8();
0410                         }
0411                     } else if (vline.value().userType() == QMetaType::QByteArray) {
0412                         input = vline.value().toByteArray();
0413                     } else {
0414                         checkMultibyte = true;
0415                         input = vline.value().toString().toUtf8();
0416                     }
0417 
0418                     // handle encoding
0419                     if (hasEncoding) { // have to encode the data
0420                         if (encodingType == QLatin1Char('b')) {
0421                             checkMultibyte = false;
0422                             output = input.toBase64();
0423                         } else if (encodingType == QLatin1String("quoted-printable")) {
0424                             checkMultibyte = false;
0425                             KCodecs::quotedPrintableEncode(input, output, false);
0426                         }
0427                     } else {
0428                         output = input;
0429                     }
0430                     addEscapes(output, (vline.identifier() == QLatin1String("CATEGORIES") || vline.identifier() == QLatin1String("GEO")));
0431 
0432                     if (!output.isEmpty()) {
0433                         textLine.append(':' + output);
0434 
0435                         if (textLine.length() > FOLD_WIDTH) { // we have to fold the line
0436                             if (checkMultibyte) {
0437                                 // RFC 6350: Multi-octet characters MUST remain contiguous.
0438                                 // we know that textLine contains UTF-8 encoded characters
0439                                 int lineLength = 0;
0440                                 for (int i = 0; i < textLine.length(); ++i) {
0441                                     if ((textLine[i] & 0xC0) == 0xC0) { // a multibyte sequence follows
0442                                         int sequenceLength = 2;
0443                                         if ((textLine[i] & 0xE0) == 0xE0) {
0444                                             sequenceLength = 3;
0445                                         } else if ((textLine[i] & 0xF0) == 0xF0) {
0446                                             sequenceLength = 4;
0447                                         }
0448                                         if ((lineLength + sequenceLength) > FOLD_WIDTH) {
0449                                             // the current line would be too long. fold it
0450                                             text += "\r\n " + textLine.mid(i, sequenceLength);
0451                                             lineLength = 1 + sequenceLength; // incl. leading space
0452                                         } else {
0453                                             text += textLine.mid(i, sequenceLength);
0454                                             lineLength += sequenceLength;
0455                                         }
0456                                         i += sequenceLength - 1;
0457                                     } else {
0458                                         text += textLine[i];
0459                                         ++lineLength;
0460                                     }
0461                                     if ((lineLength == FOLD_WIDTH) && (i < (textLine.length() - 1))) {
0462                                         text += "\r\n ";
0463                                         lineLength = 1; // leading space
0464                                     }
0465                                 }
0466                                 text += "\r\n";
0467                             } else {
0468                                 for (int i = 0; i <= (textLine.length() / FOLD_WIDTH); ++i) {
0469                                     text.append((i == 0 ? "" : " ") + textLine.mid(i * FOLD_WIDTH, FOLD_WIDTH) + "\r\n");
0470                                 }
0471                             }
0472                         } else {
0473                             text.append(textLine);
0474                             text.append("\r\n");
0475                         }
0476                     }
0477                 }
0478             }
0479         }
0480 
0481         text.append("END:VCARD\r\n");
0482         text.append("\r\n");
0483     }
0484 
0485     return text;
0486 }