File indexing completed on 2024-05-19 05:17:43
0001 /********************************************************************** 0002 * 0003 * rfccodecs.cpp - handler for various rfc/mime encodings 0004 * Copyright (C) 2000 s.carstens@gmx.de 0005 * 0006 * This library is free software; you can redistribute it and/or 0007 * modify it under the terms of the GNU Library General Public 0008 * License as published by the Free Software Foundation; either 0009 * version 2 of the License, or (at your option) any later version. 0010 * 0011 * This library is distributed in the hope that it will be useful, 0012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0014 * Library General Public License for more details. 0015 * 0016 * You should have received a copy of the GNU Library General Public License 0017 * along with this library; see the file COPYING.LIB. If not, write to 0018 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0019 * Boston, MA 02110-1301, USA. 0020 * 0021 *********************************************************************/ 0022 /** 0023 * @file 0024 * This file is part of the IMAP support library and defines the 0025 * RfcCodecs class. 0026 * 0027 * @brief 0028 * Defines the RfcCodecs class. 0029 * 0030 * @author Sven Carstens 0031 */ 0032 0033 #include "rfccodecs.h" 0034 0035 #include <ctype.h> 0036 #include <sys/types.h> 0037 0038 #include <stdio.h> 0039 #include <stdlib.h> 0040 0041 #include <QtCore/QTextCodec> 0042 #include <QtCore/QBuffer> 0043 #include <QtCore/QByteArray> 0044 #include <QtCore/QLatin1Char> 0045 #include <kcodecs.h> 0046 0047 using namespace KIMAP2; 0048 0049 // This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997. 0050 // adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000 0051 0052 //@cond PRIVATE 0053 static const unsigned char base64chars[] = 0054 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; 0055 #define UNDEFINED 64 0056 #define MAXLINE 76 0057 static const char especials[17] = "()<>@,;:\"/[]?.= "; 0058 0059 /* UTF16 definitions */ 0060 #define UTF16MASK 0x03FFUL 0061 #define UTF16SHIFT 10 0062 #define UTF16BASE 0x10000UL 0063 #define UTF16HIGHSTART 0xD800UL 0064 #define UTF16HIGHEND 0xDBFFUL 0065 #define UTF16LOSTART 0xDC00UL 0066 #define UTF16LOEND 0xDFFFUL 0067 //@endcond 0068 0069 //----------------------------------------------------------------------------- 0070 QByteArray KIMAP2::decodeImapFolderName(const QByteArray &inSrc) 0071 { 0072 unsigned char c, i, bitcount; 0073 unsigned long ucs4, utf16, bitbuf; 0074 unsigned char base64[256], utf8[6]; 0075 unsigned int srcPtr = 0; 0076 QByteArray dst; 0077 QByteArray src = inSrc; 0078 uint srcLen = inSrc.length(); 0079 0080 /* initialize modified base64 decoding table */ 0081 memset(base64, UNDEFINED, sizeof(base64)); 0082 for (i = 0; i < sizeof(base64chars); ++i) { 0083 base64[(int)base64chars[i]] = i; 0084 } 0085 0086 /* loop until end of string */ 0087 while (srcPtr < srcLen) { 0088 c = src[srcPtr++]; 0089 /* deal with literal characters and &- */ 0090 if (c != '&' || src[srcPtr] == '-') { 0091 /* encode literally */ 0092 dst += c; 0093 /* skip over the '-' if this is an &- sequence */ 0094 if (c == '&') { 0095 srcPtr++; 0096 } 0097 } else { 0098 /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */ 0099 bitbuf = 0; 0100 bitcount = 0; 0101 ucs4 = 0; 0102 while ((c = base64[(unsigned char)src[srcPtr]]) != UNDEFINED) { 0103 ++srcPtr; 0104 bitbuf = (bitbuf << 6) | c; 0105 bitcount += 6; 0106 /* enough bits for a UTF-16 character? */ 0107 if (bitcount >= 16) { 0108 bitcount -= 16; 0109 utf16 = (bitcount ? bitbuf >> bitcount : bitbuf) & 0xffff; 0110 /* convert UTF16 to UCS4 */ 0111 if (utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND) { 0112 ucs4 = (utf16 - UTF16HIGHSTART) << UTF16SHIFT; 0113 continue; 0114 } else if (utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND) { 0115 ucs4 += utf16 - UTF16LOSTART + UTF16BASE; 0116 } else { 0117 ucs4 = utf16; 0118 } 0119 /* convert UTF-16 range of UCS4 to UTF-8 */ 0120 if (ucs4 <= 0x7fUL) { 0121 utf8[0] = ucs4; 0122 i = 1; 0123 } else if (ucs4 <= 0x7ffUL) { 0124 utf8[0] = 0xc0 | (ucs4 >> 6); 0125 utf8[1] = 0x80 | (ucs4 & 0x3f); 0126 i = 2; 0127 } else if (ucs4 <= 0xffffUL) { 0128 utf8[0] = 0xe0 | (ucs4 >> 12); 0129 utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f); 0130 utf8[2] = 0x80 | (ucs4 & 0x3f); 0131 i = 3; 0132 } else { 0133 utf8[0] = 0xf0 | (ucs4 >> 18); 0134 utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f); 0135 utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f); 0136 utf8[3] = 0x80 | (ucs4 & 0x3f); 0137 i = 4; 0138 } 0139 /* copy it */ 0140 for (c = 0; c < i; ++c) { 0141 dst += utf8[c]; 0142 } 0143 } 0144 } 0145 /* skip over trailing '-' in modified UTF-7 encoding */ 0146 if (src[srcPtr] == '-') { 0147 ++srcPtr; 0148 } 0149 } 0150 } 0151 return dst; 0152 } 0153 0154 QString KIMAP2::decodeImapFolderName(const QString &inSrc) 0155 { 0156 return QString::fromUtf8(decodeImapFolderName(inSrc.toUtf8()).constData()); 0157 } 0158 0159 //----------------------------------------------------------------------------- 0160 0161 QByteArray KIMAP2::quoteIMAP(const QByteArray &src) 0162 { 0163 uint len = src.length(); 0164 QByteArray result; 0165 result.reserve(2 * len); 0166 for (unsigned int i = 0; i < len; i++) { 0167 if (src[i] == '"' || src[i] == '\\') { 0168 result += '\\'; 0169 } 0170 result += src[i]; 0171 } 0172 result.squeeze(); 0173 return result; 0174 } 0175 0176 QString KIMAP2::quoteIMAP(const QString &src) 0177 { 0178 uint len = src.length(); 0179 QString result; 0180 result.reserve(2 * len); 0181 for (unsigned int i = 0; i < len; i++) { 0182 if (src[i] == QLatin1Char('"') || src[i] == QLatin1Char('\\')) { 0183 result += QLatin1Char('\\'); 0184 } 0185 result += src[i]; 0186 } 0187 //result.squeeze(); - unnecessary and slow 0188 return result; 0189 } 0190 0191 //----------------------------------------------------------------------------- 0192 QString KIMAP2::encodeImapFolderName(const QString &inSrc) 0193 { 0194 return QString::fromUtf8(encodeImapFolderName(inSrc.toUtf8()).constData()); 0195 } 0196 0197 QByteArray KIMAP2::encodeImapFolderName(const QByteArray &inSrc) 0198 { 0199 unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag; 0200 unsigned int ucs4, bitbuf; 0201 QByteArray src = inSrc; 0202 QByteArray dst; 0203 0204 int srcPtr = 0; 0205 utf7mode = 0; 0206 utf8total = 0; 0207 bitstogo = 0; 0208 utf8pos = 0; 0209 bitbuf = 0; 0210 ucs4 = 0; 0211 while (srcPtr < src.length()) { 0212 c = (unsigned char)src[srcPtr++]; 0213 /* normal character? */ 0214 if (c >= ' ' && c <= '~') { 0215 /* switch out of UTF-7 mode */ 0216 if (utf7mode) { 0217 if (bitstogo) { 0218 dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F]; 0219 bitstogo = 0; 0220 } 0221 dst += '-'; 0222 utf7mode = 0; 0223 } 0224 dst += c; 0225 /* encode '&' as '&-' */ 0226 if (c == '&') { 0227 dst += '-'; 0228 } 0229 continue; 0230 } 0231 /* switch to UTF-7 mode */ 0232 if (!utf7mode) { 0233 dst += '&'; 0234 utf7mode = 1; 0235 } 0236 /* Encode US-ASCII characters as themselves */ 0237 if (c < 0x80) { 0238 ucs4 = c; 0239 utf8total = 1; 0240 } else if (utf8total) { 0241 /* save UTF8 bits into UCS4 */ 0242 ucs4 = (ucs4 << 6) | (c & 0x3FUL); 0243 if (++utf8pos < utf8total) { 0244 continue; 0245 } 0246 } else { 0247 utf8pos = 1; 0248 if (c < 0xE0) { 0249 utf8total = 2; 0250 ucs4 = c & 0x1F; 0251 } else if (c < 0xF0) { 0252 utf8total = 3; 0253 ucs4 = c & 0x0F; 0254 } else { 0255 /* NOTE: can't convert UTF8 sequences longer than 4 */ 0256 utf8total = 4; 0257 ucs4 = c & 0x03; 0258 } 0259 continue; 0260 } 0261 /* loop to split ucs4 into two utf16 chars if necessary */ 0262 utf8total = 0; 0263 do { 0264 if (ucs4 >= UTF16BASE) { 0265 ucs4 -= UTF16BASE; 0266 bitbuf = 0267 (bitbuf << 16) | ((ucs4 >> UTF16SHIFT) + UTF16HIGHSTART); 0268 ucs4 = (ucs4 & UTF16MASK) + UTF16LOSTART; 0269 utf16flag = 1; 0270 } else { 0271 bitbuf = (bitbuf << 16) | ucs4; 0272 utf16flag = 0; 0273 } 0274 bitstogo += 16; 0275 /* spew out base64 */ 0276 while (bitstogo >= 6) { 0277 bitstogo -= 6; 0278 dst += 0279 base64chars[(bitstogo ? (bitbuf >> bitstogo) : bitbuf) & 0x3F]; 0280 } 0281 } while (utf16flag); 0282 } 0283 /* if in UTF-7 mode, finish in ASCII */ 0284 if (utf7mode) { 0285 if (bitstogo) { 0286 dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F]; 0287 } 0288 dst += '-'; 0289 } 0290 return quoteIMAP(dst); 0291 } 0292 0293 //----------------------------------------------------------------------------- 0294 QTextCodec *KIMAP2::codecForName(const QString &str) 0295 { 0296 if (str.isEmpty()) { 0297 return Q_NULLPTR; 0298 } 0299 return QTextCodec::codecForName(str.toLower(). 0300 replace(QStringLiteral("windows"), QStringLiteral("cp")).toLatin1()); 0301 } 0302 0303 //----------------------------------------------------------------------------- 0304 const QString KIMAP2::decodeRFC2047String(const QString &str) 0305 { 0306 QString throw_away; 0307 0308 return decodeRFC2047String(str, throw_away); 0309 } 0310 0311 //----------------------------------------------------------------------------- 0312 const QString KIMAP2::decodeRFC2047String(const QString &str, 0313 QString &charset) 0314 { 0315 QString throw_away; 0316 0317 return decodeRFC2047String(str, charset, throw_away); 0318 } 0319 0320 //----------------------------------------------------------------------------- 0321 const QString KIMAP2::decodeRFC2047String(const QString &str, 0322 QString &charset, 0323 QString &language) 0324 { 0325 //do we have a rfc string 0326 if (!str.contains(QStringLiteral("=?"))) { 0327 return str; 0328 } 0329 0330 // FIXME get rid of the conversion? 0331 QByteArray aStr = str.toLatin1(); // QString.length() means Unicode chars 0332 QByteArray result; 0333 char *pos, *beg, *end, *mid = Q_NULLPTR; 0334 QByteArray cstr; 0335 char encoding = 0, ch; 0336 bool valid; 0337 const int maxLen = 200; 0338 int i; 0339 0340 // result.truncate(aStr.length()); 0341 for (pos = aStr.data(); *pos; pos++) { 0342 if (pos[0] != '=' || pos[1] != '?') { 0343 result += *pos; 0344 continue; 0345 } 0346 beg = pos + 2; 0347 end = beg; 0348 valid = true; 0349 // parse charset name 0350 for (i = 2, pos += 2; 0351 i < maxLen && 0352 (*pos != '?' && (ispunct(*pos) || isalnum(*pos))); 0353 i++) { 0354 pos++; 0355 } 0356 if (*pos != '?' || i < 4 || i >= maxLen) { 0357 valid = false; 0358 } else { 0359 charset = QLatin1String(QByteArray(beg, i - 1)); // -2 + 1 for the zero 0360 int pt = charset.lastIndexOf(QLatin1Char('*')); 0361 if (pt != -1) { 0362 // save language for later usage 0363 language = charset.right(charset.length() - pt - 1); 0364 0365 // tie off language as defined in rfc2047 0366 charset.truncate(pt); 0367 } 0368 // get encoding and check delimiting question marks 0369 encoding = toupper(pos[1]); 0370 if (pos[2] != '?' || 0371 (encoding != 'Q' && encoding != 'B' && 0372 encoding != 'q' && encoding != 'b')) { 0373 valid = false; 0374 } 0375 pos += 3; 0376 i += 3; 0377 // qCDebug(KIMAP2_LOG) << "Charset:" << charset << "- Language:" << language << "-'" << pos << "'"; 0378 } 0379 if (valid) { 0380 mid = pos; 0381 // search for end of encoded part 0382 while (i < maxLen && *pos && !(*pos == '?' && *(pos + 1) == '=')) { 0383 i++; 0384 pos++; 0385 } 0386 end = pos + 2;//end now points to the first char after the encoded string 0387 if (i >= maxLen || !*pos) { 0388 valid = false; 0389 } 0390 } 0391 if (valid) { 0392 ch = *pos; 0393 *pos = '\0'; 0394 cstr = QByteArray(mid).left((int)(mid - pos - 1)); 0395 if (encoding == 'Q') { 0396 // decode quoted printable text 0397 for (i = cstr.length() - 1; i >= 0; --i) { 0398 if (cstr[i] == '_') { 0399 cstr[i] = ' '; 0400 } 0401 } 0402 // qCDebug(KIMAP2_LOG) << "before QP '" 0403 // << cstr << "'"; 0404 cstr = KCodecs::quotedPrintableDecode(cstr); 0405 // qCDebug(KIMAP2_LOG) << "after QP '" 0406 // << cstr << "'"; 0407 } else { 0408 // decode base64 text 0409 cstr = QByteArray::fromBase64(cstr); 0410 } 0411 *pos = ch; 0412 int len = cstr.length(); 0413 for (i = 0; i < len; ++i) { 0414 result += cstr[i]; 0415 } 0416 0417 pos = end - 1; 0418 } else { 0419 // qCDebug(KIMAP2_LOG) << "invalid"; 0420 //result += "=?"; 0421 //pos = beg -1; // because pos gets increased shortly afterwards 0422 pos = beg - 2; 0423 result += *pos++; 0424 result += *pos; 0425 } 0426 } 0427 if (!charset.isEmpty()) { 0428 QTextCodec *aCodec = codecForName(QLatin1String(charset.toLatin1())); 0429 if (aCodec) { 0430 // qCDebug(KIMAP2_LOG) << "Codec is" << aCodec->name(); 0431 return aCodec->toUnicode(result); 0432 } 0433 } 0434 return QLatin1String(result); 0435 } 0436 0437 //----------------------------------------------------------------------------- 0438 const QString KIMAP2::encodeRFC2047String(const QString &str) 0439 { 0440 return QLatin1String(encodeRFC2047String(str.toLatin1())); 0441 } 0442 0443 //----------------------------------------------------------------------------- 0444 const QByteArray KIMAP2::encodeRFC2047String(const QByteArray &str) 0445 { 0446 if (str.isEmpty()) { 0447 return str; 0448 } 0449 0450 const signed char *latin = 0451 reinterpret_cast<const signed char *> 0452 (str.data()), *l, *start, *stop; 0453 char hexcode; 0454 int numQuotes, i; 0455 int rptr = 0; 0456 // My stats show this number results in 12 resize() out of 73,000 0457 int resultLen = 3 * str.length() / 2; 0458 QByteArray result(resultLen, '\0'); 0459 0460 while (*latin) { 0461 l = latin; 0462 start = latin; 0463 while (*l) { 0464 if (*l == 32) { 0465 start = l + 1; 0466 } 0467 if (*l < 0) { 0468 break; 0469 } 0470 l++; 0471 } 0472 if (*l) { 0473 numQuotes = 1; 0474 while (*l) { 0475 /* The encoded word must be limited to 75 character */ 0476 for (i = 0; i < 16; ++i) { 0477 if (*l == especials[i]) { 0478 numQuotes++; 0479 } 0480 } 0481 if (*l < 0) { 0482 numQuotes++; 0483 } 0484 /* Stop after 58 = 75 - 17 characters or at "<user@host..." */ 0485 if (l - start + 2 * numQuotes >= 58 || *l == 60) { 0486 break; 0487 } 0488 l++; 0489 } 0490 if (*l) { 0491 stop = l - 1; 0492 while (stop >= start && *stop != 32) { 0493 stop--; 0494 } 0495 if (stop <= start) { 0496 stop = l; 0497 } 0498 } else { 0499 stop = l; 0500 } 0501 if (resultLen - rptr - 1 <= start - latin + 1 + 16) { 0502 // =?iso-88... 0503 resultLen += (start - latin + 1) * 2 + 20; // more space 0504 result.resize(resultLen); 0505 } 0506 while (latin < start) { 0507 result[rptr++] = *latin; 0508 latin++; 0509 } 0510 result.replace(rptr, 15, "=?iso-8859-1?q?"); 0511 rptr += 15; 0512 if (resultLen - rptr - 1 <= 3 * (stop - latin + 1)) { 0513 resultLen += (stop - latin + 1) * 4 + 20; // more space 0514 result.resize(resultLen); 0515 } 0516 while (latin < stop) { 0517 // can add up to 3 chars/iteration 0518 numQuotes = 0; 0519 for (i = 0; i < 16; ++i) { 0520 if (*latin == especials[i]) { 0521 numQuotes = 1; 0522 } 0523 } 0524 if (*latin < 0) { 0525 numQuotes = 1; 0526 } 0527 if (numQuotes) { 0528 result[rptr++] = '='; 0529 hexcode = ((*latin & 0xF0) >> 4) + 48; 0530 if (hexcode >= 58) { 0531 hexcode += 7; 0532 } 0533 result[rptr++] = hexcode; 0534 hexcode = (*latin & 0x0F) + 48; 0535 if (hexcode >= 58) { 0536 hexcode += 7; 0537 } 0538 result[rptr++] = hexcode; 0539 } else { 0540 result[rptr++] = *latin; 0541 } 0542 latin++; 0543 } 0544 result[rptr++] = '?'; 0545 result[rptr++] = '='; 0546 } else { 0547 while (*latin) { 0548 if (rptr == resultLen - 1) { 0549 resultLen += 30; 0550 result.resize(resultLen); 0551 } 0552 result[rptr++] = *latin; 0553 latin++; 0554 } 0555 } 0556 } 0557 result[rptr] = 0; 0558 return result; 0559 } 0560 0561 //----------------------------------------------------------------------------- 0562 const QString KIMAP2::encodeRFC2231String(const QString &str) 0563 { 0564 if (str.isEmpty()) { 0565 return str; 0566 } 0567 0568 signed char *latin = (signed char *)calloc(1, str.length() + 1); 0569 char *latin_us = (char *)latin; 0570 strcpy(latin_us, str.toLatin1()); 0571 signed char *l = latin; 0572 char hexcode; 0573 int i; 0574 bool quote; 0575 while (*l) { 0576 if (*l < 0) { 0577 break; 0578 } 0579 l++; 0580 } 0581 if (!*l) { 0582 free(latin); 0583 return str; 0584 } 0585 QByteArray result; 0586 l = latin; 0587 while (*l) { 0588 quote = *l < 0; 0589 for (i = 0; i < 16; ++i) { 0590 if (*l == especials[i]) { 0591 quote = true; 0592 } 0593 } 0594 if (quote) { 0595 result += '%'; 0596 hexcode = ((*l & 0xF0) >> 4) + 48; 0597 if (hexcode >= 58) { 0598 hexcode += 7; 0599 } 0600 result += hexcode; 0601 hexcode = (*l & 0x0F) + 48; 0602 if (hexcode >= 58) { 0603 hexcode += 7; 0604 } 0605 result += hexcode; 0606 } else { 0607 result += *l; 0608 } 0609 l++; 0610 } 0611 free(latin); 0612 return QLatin1String(result); 0613 } 0614 0615 //----------------------------------------------------------------------------- 0616 const QString KIMAP2::decodeRFC2231String(const QString &str) 0617 { 0618 int p = str.indexOf(QLatin1Char('\'')); 0619 0620 //see if it is an rfc string 0621 if (p < 0) { 0622 return str; 0623 } 0624 0625 int l = str.lastIndexOf(QLatin1Char('\'')); 0626 0627 //second is language 0628 if (p >= l) { 0629 return str; 0630 } 0631 0632 //first is charset or empty 0633 //QString charset = str.left ( p ); 0634 QString st = str.mid(l + 1); 0635 //QString language = str.mid ( p + 1, l - p - 1 ); 0636 0637 //qCDebug(KIMAP2_LOG) << "Charset:" << charset << "Language:" << language; 0638 0639 char ch, ch2; 0640 p = 0; 0641 while (p < (int) st.length()) { 0642 if (st.at(p) == 37) { 0643 ch = st.at(p + 1).toLatin1() - 48; 0644 if (ch > 16) { 0645 ch -= 7; 0646 } 0647 ch2 = st.at(p + 2).toLatin1() - 48; 0648 if (ch2 > 16) { 0649 ch2 -= 7; 0650 } 0651 st.replace(p, 1, ch * 16 + ch2); 0652 st.remove(p + 1, 2); 0653 } 0654 p++; 0655 } 0656 return st; 0657 }