kimap/src/rfccodecs.cpp

0001 /**********************************************************************
0002  *
0003  *   rfccodecs.cpp - handler for various rfc/mime encodings
0004  *   SPDX-FileCopyrightText: 2000 s .carstens@gmx.de
0005  *
0006  *   SPDX-License-Identifier: LGPL-2.0-or-later
0007  *
0008  *********************************************************************/
0009 /**
0010  * @file
0011  * This file is part of the IMAP support library and defines the
0012  * RfcCodecs class.
0013  *
0014  * @brief
0015  * Defines the RfcCodecs class.
0016  *
0017  * @author Sven Carstens
0018  */
0019
0020 #include "rfccodecs.h"
0021
0022 #include <ctype.h>
0023 #include <sys/types.h>
0024
0025 #include <stdio.h>
0026 #include <stdlib.h>
0027
0028 #include <QByteArray>
0029 #include <QLatin1Char>
0030
0031 using namespace KIMAP;
0032
0033 // This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997.
0034 // adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000
0035
0036 //@cond PRIVATE
0037 static const unsigned char base64chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
0038 #define UNDEFINED 64
0039 #define MAXLINE 76
0040 static const char especials[17] = "()<>@,;:\"/[]?.= ";
0041
0042 /* UTF16 definitions */
0043 #define UTF16MASK 0x03FFUL
0044 #define UTF16SHIFT 10
0045 #define UTF16BASE 0x10000UL
0046 #define UTF16HIGHSTART 0xD800UL
0047 #define UTF16HIGHEND 0xDBFFUL
0048 #define UTF16LOSTART 0xDC00UL
0049 #define UTF16LOEND 0xDFFFUL
0050 //@endcond
0051
0052 //-----------------------------------------------------------------------------
0053 QByteArray KIMAP::decodeImapFolderName(const QByteArray &inSrc)
0054 {
0055     unsigned char c;
0056     unsigned char i;
0057     unsigned char bitcount;
0058     unsigned long ucs4;
0059     unsigned long utf16;
0060     unsigned long bitbuf;
0061     unsigned char base64[256];
0062     unsigned char utf8[6];
0063     unsigned int srcPtr = 0;
0064     QByteArray dst;
0065     QByteArray src = inSrc;
0066     uint srcLen = inSrc.length();
0067
0068     /* initialize modified base64 decoding table */
0069     memset(base64, UNDEFINED, sizeof(base64));
0070     for (i = 0; i < sizeof(base64chars); ++i) {
0071         base64[(int)base64chars[i]] = i;
0072     }
0073
0074     /* loop until end of string */
0075     while (srcPtr < srcLen) {
0076         c = src[srcPtr++];
0077         /* deal with literal characters and &- */
0078         if (c != '&' || src[srcPtr] == '-') {
0079             /* encode literally */
0080             dst += c;
0081             /* skip over the '-' if this is an &- sequence */
0082             if (c == '&') {
0083                 srcPtr++;
0084             }
0085         } else {
0086             /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */
0087             bitbuf = 0;
0088             bitcount = 0;
0089             ucs4 = 0;
0090             while ((c = base64[(unsigned char)src[srcPtr]]) != UNDEFINED) {
0091                 ++srcPtr;
0092                 bitbuf = (bitbuf << 6) | c;
0093                 bitcount += 6;
0094                 /* enough bits for a UTF-16 character? */
0095                 if (bitcount >= 16) {
0096                     bitcount -= 16;
0097                     utf16 = (bitcount ? bitbuf >> bitcount : bitbuf) & 0xffff;
0098                     /* convert UTF16 to UCS4 */
0099                     if (utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND) {
0100                         ucs4 = (utf16 - UTF16HIGHSTART) << UTF16SHIFT;
0101                         continue;
0102                     } else if (utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND) {
0103                         ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
0104                     } else {
0105                         ucs4 = utf16;
0106                     }
0107                     /* convert UTF-16 range of UCS4 to UTF-8 */
0108                     if (ucs4 <= 0x7fUL) {
0109                         utf8[0] = ucs4;
0110                         i = 1;
0111                     } else if (ucs4 <= 0x7ffUL) {
0112                         utf8[0] = 0xc0 | (ucs4 >> 6);
0113                         utf8[1] = 0x80 | (ucs4 & 0x3f);
0114                         i = 2;
0115                     } else if (ucs4 <= 0xffffUL) {
0116                         utf8[0] = 0xe0 | (ucs4 >> 12);
0117                         utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f);
0118                         utf8[2] = 0x80 | (ucs4 & 0x3f);
0119                         i = 3;
0120                     } else {
0121                         utf8[0] = 0xf0 | (ucs4 >> 18);
0122                         utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f);
0123                         utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f);
0124                         utf8[3] = 0x80 | (ucs4 & 0x3f);
0125                         i = 4;
0126                     }
0127                     /* copy it */
0128                     for (c = 0; c < i; ++c) {
0129                         dst += utf8[c];
0130                     }
0131                 }
0132             }
0133             /* skip over trailing '-' in modified UTF-7 encoding */
0134             if (src[srcPtr] == '-') {
0135                 ++srcPtr;
0136             }
0137         }
0138     }
0139     return dst;
0140 }
0141
0142 QString KIMAP::decodeImapFolderName(const QString &inSrc)
0143 {
0144     return QString::fromUtf8(decodeImapFolderName(inSrc.toUtf8()).constData());
0145 }
0146
0147 //-----------------------------------------------------------------------------
0148
0149 QByteArray KIMAP::quoteIMAP(const QByteArray &src)
0150 {
0151     int len = src.length();
0152     QByteArray result;
0153     result.reserve(2 * len);
0154     for (int i = 0; i < len; i++) {
0155         if (src[i] == '"' || src[i] == '\\') {
0156             result += '\\';
0157         }
0158         result += src[i];
0159     }
0160     result.squeeze();
0161     return result;
0162 }
0163
0164 QString KIMAP::quoteIMAP(const QString &src)
0165 {
0166     uint len = src.length();
0167     QString result;
0168     result.reserve(2 * len);
0169     for (unsigned int i = 0; i < len; i++) {
0170         if (src[i] == QLatin1Char('"') || src[i] == QLatin1Char('\\')) {
0171             result += QLatin1Char('\\');
0172         }
0173         result += src[i];
0174     }
0175     // result.squeeze(); - unnecessary and slow
0176     return result;
0177 }
0178
0179 //-----------------------------------------------------------------------------
0180 QString KIMAP::encodeImapFolderName(const QString &inSrc)
0181 {
0182     return QString::fromUtf8(encodeImapFolderName(inSrc.toUtf8()).constData());
0183 }
0184
0185 QByteArray KIMAP::encodeImapFolderName(const QByteArray &inSrc)
0186 {
0187     unsigned int utf8pos;
0188     unsigned int utf8total;
0189     unsigned int c;
0190     unsigned int utf7mode;
0191     unsigned int bitstogo;
0192     unsigned int utf16flag;
0193     unsigned int ucs4;
0194     unsigned int bitbuf;
0195     QByteArray src = inSrc;
0196     QByteArray dst;
0197
0198     int srcPtr = 0;
0199     utf7mode = 0;
0200     utf8total = 0;
0201     bitstogo = 0;
0202     utf8pos = 0;
0203     bitbuf = 0;
0204     ucs4 = 0;
0205     while (srcPtr < src.length()) {
0206         c = (unsigned char)src[srcPtr++];
0207         /* normal character? */
0208         if (c >= ' ' && c <= '~') {
0209             /* switch out of UTF-7 mode */
0210             if (utf7mode) {
0211                 if (bitstogo) {
0212                     dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
0213                     bitstogo = 0;
0214                 }
0215                 dst += '-';
0216                 utf7mode = 0;
0217             }
0218             dst += c;
0219             /* encode '&' as '&-' */
0220             if (c == '&') {
0221                 dst += '-';
0222             }
0223             continue;
0224         }
0225         /* switch to UTF-7 mode */
0226         if (!utf7mode) {
0227             dst += '&';
0228             utf7mode = 1;
0229         }
0230         /* Encode US-ASCII characters as themselves */
0231         if (c < 0x80) {
0232             ucs4 = c;
0233             utf8total = 1;
0234         } else if (utf8total) {
0235             /* save UTF8 bits into UCS4 */
0236             ucs4 = (ucs4 << 6) | (c & 0x3FUL);
0237             if (++utf8pos < utf8total) {
0238                 continue;
0239             }
0240         } else {
0241             utf8pos = 1;
0242             if (c < 0xE0) {
0243                 utf8total = 2;
0244                 ucs4 = c & 0x1F;
0245             } else if (c < 0xF0) {
0246                 utf8total = 3;
0247                 ucs4 = c & 0x0F;
0248             } else {
0249                 /* NOTE: can't convert UTF8 sequences longer than 4 */
0250                 utf8total = 4;
0251                 ucs4 = c & 0x03;
0252             }
0253             continue;
0254         }
0255         /* loop to split ucs4 into two utf16 chars if necessary */
0256         utf8total = 0;
0257         do {
0258             if (ucs4 >= UTF16BASE) {
0259                 ucs4 -= UTF16BASE;
0260                 bitbuf = (bitbuf << 16) | ((ucs4 >> UTF16SHIFT) + UTF16HIGHSTART);
0261                 ucs4 = (ucs4 & UTF16MASK) + UTF16LOSTART;
0262                 utf16flag = 1;
0263             } else {
0264                 bitbuf = (bitbuf << 16) | ucs4;
0265                 utf16flag = 0;
0266             }
0267             bitstogo += 16;
0268             /* spew out base64 */
0269             while (bitstogo >= 6) {
0270                 bitstogo -= 6;
0271                 dst += base64chars[(bitstogo ? (bitbuf >> bitstogo) : bitbuf) & 0x3F];
0272             }
0273         } while (utf16flag);
0274     }
0275     /* if in UTF-7 mode, finish in ASCII */
0276     if (utf7mode) {
0277         if (bitstogo) {
0278             dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
0279         }
0280         dst += '-';
0281     }
0282     return quoteIMAP(dst);
0283 }