Parser/3rdparty/rfccodecs.cpp

0001 /**********************************************************************
0002  *
0003  *   rfccodecs.cpp - handler for various rfc/mime encodings
0004  *   Copyright (C) 2000 s.carstens@gmx.de
0005  *
0006  *   This library is free software; you can redistribute it and/or
0007  *   modify it under the terms of the GNU Library General Public
0008  *   License as published by the Free Software Foundation; either
0009  *   version 2 of the License, or (at your option) any later version.
0010  *
0011  *   This library is distributed in the hope that it will be useful,
0012  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
0013  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0014  *   Library General Public License for more details.
0015  *
0016  *   You should have received a copy of the GNU Library General Public License
0017  *   along with this library; see the file COPYING.LIB.  If not, write to
0018  *   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0019  *   Boston, MA 02110-1301, USA.
0020  *
0021  *********************************************************************/
0022 /**
0023  * @file
0024  * This file is part of the IMAP support library and defines the
0025  * RfcCodecs class.
0026  *
0027  * @brief
0028  * Defines the RfcCodecs class.
0029  *
0030  * @author Sven Carstens
0031  */
0032
0033 #include "rfccodecs.h"
0034
0035 #include <ctype.h>
0036 #include <sys/types.h>
0037
0038 #include <stdio.h>
0039 #include <stdlib.h>
0040
0041 #include <QtCore/QTextCodec>
0042 #include <QtCore/QBuffer>
0043 #include <QtCore/QByteArray>
0044 #include <QtCore/QLatin1Char>
0045 #include "kcodecs.h"
0046
0047 using namespace KIMAP;
0048
0049 // This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997.
0050 // adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000
0051
0052 //@cond PRIVATE
0053 static const unsigned char base64chars[] =
0054   "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
0055 #define UNDEFINED 64
0056 #define MAXLINE  76
0057
0058 /* UTF16 definitions */
0059 #define UTF16MASK       0x03FFUL
0060 #define UTF16SHIFT      10
0061 #define UTF16BASE       0x10000UL
0062 #define UTF16HIGHSTART  0xD800UL
0063 #define UTF16HIGHEND    0xDBFFUL
0064 #define UTF16LOSTART    0xDC00UL
0065 #define UTF16LOEND      0xDFFFUL
0066 //@endcond
0067
0068 //-----------------------------------------------------------------------------
0069 QString KIMAP::decodeImapFolderName( const QByteArray &src )
0070 {
0071   unsigned char c, i, bitcount;
0072   unsigned long ucs4, utf16, bitbuf;
0073   unsigned char base64[256], utf8[6];
0074   unsigned int srcPtr = 0;
0075   QByteArray dst;
0076   uint srcLen = src.length();
0077
0078   /* initialize modified base64 decoding table */
0079   memset( base64, UNDEFINED, sizeof( base64 ) );
0080   for ( i = 0; i < sizeof( base64chars ); ++i ) {
0081     base64[(int)base64chars[i]] = i;
0082   }
0083
0084   /* loop until end of string */
0085   while ( srcPtr < srcLen ) {
0086     c = src[srcPtr++];
0087     /* deal with literal characters and &- */
0088     if ( c != '&' || src[srcPtr] == '-' ) {
0089       /* encode literally */
0090       dst += c;
0091       /* skip over the '-' if this is an &- sequence */
0092       if ( c == '&' ) {
0093         srcPtr++;
0094       }
0095     } else {
0096       /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */
0097       bitbuf = 0;
0098       bitcount = 0;
0099       ucs4 = 0;
0100       while ( ( c = base64[(unsigned char)src[srcPtr]] ) != UNDEFINED ) {
0101         ++srcPtr;
0102         bitbuf = ( bitbuf << 6 ) | c;
0103         bitcount += 6;
0104         /* enough bits for a UTF-16 character? */
0105         if ( bitcount >= 16 ) {
0106           bitcount -= 16;
0107           utf16 = ( bitcount ? bitbuf >> bitcount : bitbuf ) & 0xffff;
0108           /* convert UTF16 to UCS4 */
0109           if ( utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND ) {
0110             ucs4 = ( utf16 - UTF16HIGHSTART ) << UTF16SHIFT;
0111             continue;
0112           } else if ( utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND ) {
0113             ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
0114           } else {
0115             ucs4 = utf16;
0116           }
0117           /* convert UTF-16 range of UCS4 to UTF-8 */
0118           if ( ucs4 <= 0x7fUL ) {
0119             utf8[0] = ucs4;
0120             i = 1;
0121           } else if ( ucs4 <= 0x7ffUL ) {
0122             utf8[0] = 0xc0 | ( ucs4 >> 6 );
0123             utf8[1] = 0x80 | ( ucs4 & 0x3f );
0124             i = 2;
0125           } else if ( ucs4 <= 0xffffUL ) {
0126             utf8[0] = 0xe0 | ( ucs4 >> 12 );
0127             utf8[1] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f );
0128             utf8[2] = 0x80 | ( ucs4 & 0x3f );
0129             i = 3;
0130           } else {
0131             utf8[0] = 0xf0 | ( ucs4 >> 18 );
0132             utf8[1] = 0x80 | ( ( ucs4 >> 12 ) & 0x3f );
0133             utf8[2] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f );
0134             utf8[3] = 0x80 | ( ucs4 & 0x3f );
0135             i = 4;
0136           }
0137           /* copy it */
0138           for ( c = 0; c < i; ++c ) {
0139             dst += utf8[c];
0140           }
0141         }
0142       }
0143       /* skip over trailing '-' in modified UTF-7 encoding */
0144       if ( src[srcPtr] == '-' ) {
0145         ++srcPtr;
0146       }
0147     }
0148   }
0149   return QString::fromUtf8( dst.data () );
0150 }
0151
0152 //-----------------------------------------------------------------------------
0153 QByteArray KIMAP::encodeImapFolderName( const QString &inSrc )
0154 {
0155   unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
0156   unsigned int ucs4, bitbuf;
0157   QByteArray src = inSrc.toUtf8 ();
0158   QByteArray dst;
0159
0160   int srcPtr = 0;
0161   utf7mode = 0;
0162   utf8total = 0;
0163   bitstogo = 0;
0164   utf8pos = 0;
0165   bitbuf = 0;
0166   ucs4 = 0;
0167   while ( srcPtr < src.length () ) {
0168     c = (unsigned char)src[srcPtr++];
0169     /* normal character? */
0170     if ( c >= ' ' && c <= '~' ) {
0171       /* switch out of UTF-7 mode */
0172       if ( utf7mode ) {
0173         if ( bitstogo ) {
0174           dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F];
0175           bitstogo = 0;
0176         }
0177         dst += '-';
0178         utf7mode = 0;
0179       }
0180       dst += c;
0181       /* encode '&' as '&-' */
0182       if ( c == '&' ) {
0183         dst += '-';
0184       }
0185       continue;
0186     }
0187     /* switch to UTF-7 mode */
0188     if ( !utf7mode ) {
0189       dst += '&';
0190       utf7mode = 1;
0191     }
0192     /* Encode US-ASCII characters as themselves */
0193     if ( c < 0x80 ) {
0194       ucs4 = c;
0195       utf8total = 1;
0196     } else if ( utf8total ) {
0197       /* save UTF8 bits into UCS4 */
0198       ucs4 = ( ucs4 << 6 ) | ( c & 0x3FUL );
0199       if ( ++utf8pos < utf8total ) {
0200         continue;
0201       }
0202     } else {
0203       utf8pos = 1;
0204       if ( c < 0xE0 ) {
0205         utf8total = 2;
0206         ucs4 = c & 0x1F;
0207       } else if ( c < 0xF0 ) {
0208         utf8total = 3;
0209         ucs4 = c & 0x0F;
0210       } else {
0211         /* NOTE: can't convert UTF8 sequences longer than 4 */
0212         utf8total = 4;
0213         ucs4 = c & 0x03;
0214       }
0215       continue;
0216     }
0217     /* loop to split ucs4 into two utf16 chars if necessary */
0218     utf8total = 0;
0219     do
0220     {
0221       if ( ucs4 >= UTF16BASE ) {
0222         ucs4 -= UTF16BASE;
0223         bitbuf =
0224           ( bitbuf << 16 ) | ( ( ucs4 >> UTF16SHIFT ) + UTF16HIGHSTART );
0225         ucs4 = ( ucs4 & UTF16MASK ) + UTF16LOSTART;
0226         utf16flag = 1;
0227       } else {
0228         bitbuf = ( bitbuf << 16 ) | ucs4;
0229         utf16flag = 0;
0230       }
0231       bitstogo += 16;
0232       /* spew out base64 */
0233       while ( bitstogo >= 6 ) {
0234         bitstogo -= 6;
0235         dst +=
0236           base64chars[( bitstogo ? ( bitbuf >> bitstogo ) : bitbuf ) & 0x3F];
0237       }
0238     }
0239     while ( utf16flag );
0240   }
0241   /* if in UTF-7 mode, finish in ASCII */
0242   if ( utf7mode ) {
0243     if ( bitstogo ) {
0244       dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F];
0245     }
0246     dst += '-';
0247   }
0248   return dst;
0249 }