File indexing completed on 2024-09-15 11:55:01
0001 /* -*- c++ -*- 0002 SPDX-FileCopyrightText: 2001 Marc Mutz <mutz@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 /** 0007 @file 0008 This file is part of the API for handling @ref MIME data and 0009 defines the @ref Base64 and @ref RFC2047B @ref Codec classes. 0010 0011 @brief 0012 Defines the Base64Codec and Rfc2047BEncodingCodec classes. 0013 0014 @authors Marc Mutz \<mutz@kde.org\> 0015 */ 0016 0017 #include "kcodecsbase64.h" 0018 #include "kcodecs_p.h" 0019 0020 #include <QDebug> 0021 0022 #include <cassert> 0023 0024 using namespace KCodecs; 0025 0026 namespace KCodecs 0027 { 0028 // codec for base64 as specified in RFC 2045 0029 // class Base64Codec; 0030 // class Base64Decoder; 0031 // class Base64Encoder; 0032 0033 // codec for the B encoding as specified in RFC 2047 0034 // class Rfc2047BEncodingCodec; 0035 // class Rfc2047BEncodingEncoder; 0036 // class Rfc2047BEncodingDecoder; 0037 0038 //@cond PRIVATE 0039 static const uchar base64DecodeMap[128] = { 0040 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0041 0042 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64, 0043 0044 64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64, 0045 0046 64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64}; 0047 0048 static const char base64EncodeMap[64] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 0049 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 0050 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'}; 0051 //@endcond 0052 0053 class Base64Decoder : public Decoder 0054 { 0055 uint mStepNo; 0056 uchar mOutbits; 0057 bool mSawPadding : 1; 0058 0059 protected: 0060 friend class Base64Codec; 0061 Base64Decoder(Codec::NewlineType newline = Codec::NewlineLF) 0062 : Decoder(newline) 0063 , mStepNo(0) 0064 , mOutbits(0) 0065 , mSawPadding(false) 0066 { 0067 } 0068 0069 public: 0070 ~Base64Decoder() override 0071 { 0072 } 0073 0074 bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; 0075 // ### really needs no finishing??? 0076 bool finish(char *&dcursor, const char *const dend) override 0077 { 0078 Q_UNUSED(dcursor); 0079 Q_UNUSED(dend); 0080 return true; 0081 } 0082 }; 0083 0084 class Base64Encoder : public Encoder 0085 { 0086 uint mStepNo; 0087 /** number of already written base64-quartets on current line */ 0088 uint mWrittenPacketsOnThisLine; 0089 uchar mNextbits; 0090 bool mInsideFinishing : 1; 0091 0092 protected: 0093 friend class Rfc2047BEncodingCodec; 0094 friend class Rfc2047BEncodingEncoder; 0095 friend class Base64Codec; 0096 Base64Encoder(Codec::NewlineType newline = Codec::NewlineLF) 0097 : Encoder(newline) 0098 , mStepNo(0) 0099 , mWrittenPacketsOnThisLine(0) 0100 , mNextbits(0) 0101 , mInsideFinishing(false) 0102 { 0103 } 0104 0105 bool generic_finish(char *&dcursor, const char *const dend, bool withLFatEnd); 0106 0107 public: 0108 ~Base64Encoder() override 0109 { 0110 } 0111 0112 bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; 0113 0114 bool finish(char *&dcursor, const char *const dend) override; 0115 0116 protected: 0117 bool writeBase64(uchar ch, char *&dcursor, const char *const dend) 0118 { 0119 return write(base64EncodeMap[ch], dcursor, dend); 0120 } 0121 }; 0122 0123 class Rfc2047BEncodingEncoder : public Base64Encoder 0124 { 0125 protected: 0126 friend class Rfc2047BEncodingCodec; 0127 Rfc2047BEncodingEncoder(Codec::NewlineType newline = Codec::NewlineLF) 0128 : Base64Encoder(newline) 0129 { 0130 } 0131 0132 public: 0133 bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override; 0134 bool finish(char *&dcursor, const char *const dend) override; 0135 }; 0136 0137 Encoder *Base64Codec::makeEncoder(Codec::NewlineType newline) const 0138 { 0139 return new Base64Encoder(newline); 0140 } 0141 0142 Decoder *Base64Codec::makeDecoder(Codec::NewlineType newline) const 0143 { 0144 return new Base64Decoder(newline); 0145 } 0146 0147 Encoder *Rfc2047BEncodingCodec::makeEncoder(Codec::NewlineType newline) const 0148 { 0149 return new Rfc2047BEncodingEncoder(newline); 0150 } 0151 0152 /********************************************************/ 0153 /********************************************************/ 0154 /********************************************************/ 0155 0156 bool Base64Decoder::decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) 0157 { 0158 while (dcursor != dend && scursor != send) { 0159 uchar ch = *scursor++; 0160 uchar value; 0161 0162 // try converting ch to a 6-bit value: 0163 if (ch < 128) { 0164 value = base64DecodeMap[ch]; 0165 } else { 0166 value = 64; 0167 } 0168 0169 // ch isn't of the base64 alphabet, check for other significant chars: 0170 if (value >= 64) { 0171 if (ch == '=') { 0172 // padding: 0173 if (mStepNo == 0 || mStepNo == 1) { 0174 if (!mSawPadding) { 0175 // malformed 0176 // qWarning() << "Base64Decoder: unexpected padding" 0177 // "character in input stream"; 0178 } 0179 mSawPadding = true; 0180 break; 0181 } else if (mStepNo == 2) { 0182 // ok, there should be another one 0183 } else if (mStepNo == 3) { 0184 // ok, end of encoded stream 0185 mSawPadding = true; 0186 break; 0187 } 0188 mSawPadding = true; 0189 mStepNo = (mStepNo + 1) % 4; 0190 continue; 0191 } else { 0192 // non-base64 alphabet 0193 continue; 0194 } 0195 } 0196 0197 if (mSawPadding) { 0198 // qWarning() << "Base64Decoder: Embedded padding character" 0199 // "encountered!"; 0200 return true; 0201 } 0202 0203 // add the new bits to the output stream and flush full octets: 0204 switch (mStepNo) { 0205 case 0: 0206 mOutbits = value << 2; 0207 break; 0208 case 1: 0209 *dcursor++ = (char)(mOutbits | value >> 4); 0210 mOutbits = value << 4; 0211 break; 0212 case 2: 0213 *dcursor++ = (char)(mOutbits | value >> 2); 0214 mOutbits = value << 6; 0215 break; 0216 case 3: 0217 *dcursor++ = (char)(mOutbits | value); 0218 mOutbits = 0; 0219 break; 0220 default: 0221 assert(0); 0222 } 0223 mStepNo = (mStepNo + 1) % 4; 0224 } 0225 0226 // return false when caller should call us again: 0227 return scursor == send; 0228 } // Base64Decoder::decode() 0229 0230 bool Base64Encoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) 0231 { 0232 const uint maxPacketsPerLine = 76 / 4; 0233 0234 // detect when the caller doesn't adhere to our rules: 0235 if (mInsideFinishing) { 0236 return true; 0237 } 0238 0239 while (scursor != send && dcursor != dend) { 0240 // properly empty the output buffer before starting something new: 0241 // ### fixme: we can optimize this away, since the buffer isn't 0242 // written to anyway (most of the time) 0243 if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { 0244 return scursor == send; 0245 } 0246 0247 uchar ch = *scursor++; 0248 // mNextbits // (part of) value of next sextet 0249 0250 // check for line length; 0251 if (mStepNo == 0 && mWrittenPacketsOnThisLine >= maxPacketsPerLine) { 0252 writeCRLF(dcursor, dend); 0253 mWrittenPacketsOnThisLine = 0; 0254 } 0255 0256 // depending on mStepNo, extract value and mNextbits from the 0257 // octet stream: 0258 switch (mStepNo) { 0259 case 0: 0260 assert(mNextbits == 0); 0261 writeBase64(ch >> 2, dcursor, dend); // top-most 6 bits -> output 0262 mNextbits = (ch & 0x3) << 4; // 0..1 bits -> 4..5 in mNextbits 0263 break; 0264 case 1: 0265 assert((mNextbits & ~0x30) == 0); 0266 writeBase64(mNextbits | ch >> 4, dcursor, dend); // 4..7 bits -> 0..3 in value 0267 mNextbits = (ch & 0xf) << 2; // 0..3 bits -> 2..5 in mNextbits 0268 break; 0269 case 2: 0270 assert((mNextbits & ~0x3C) == 0); 0271 writeBase64(mNextbits | ch >> 6, dcursor, dend); // 6..7 bits -> 0..1 in value 0272 writeBase64(ch & 0x3F, dcursor, dend); // 0..5 bits -> output 0273 mNextbits = 0; 0274 mWrittenPacketsOnThisLine++; 0275 break; 0276 default: 0277 assert(0); 0278 } 0279 mStepNo = (mStepNo + 1) % 3; 0280 } 0281 0282 if (d->outputBufferCursor) { 0283 flushOutputBuffer(dcursor, dend); 0284 } 0285 0286 return scursor == send; 0287 } 0288 0289 bool Rfc2047BEncodingEncoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) 0290 { 0291 // detect when the caller doesn't adhere to our rules: 0292 if (mInsideFinishing) { 0293 return true; 0294 } 0295 0296 while (scursor != send && dcursor != dend) { 0297 // properly empty the output buffer before starting something new: 0298 // ### fixme: we can optimize this away, since the buffer isn't 0299 // written to anyway (most of the time) 0300 if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { 0301 return scursor == send; 0302 } 0303 0304 uchar ch = *scursor++; 0305 // mNextbits // (part of) value of next sextet 0306 0307 // depending on mStepNo, extract value and mNextbits from the 0308 // octet stream: 0309 switch (mStepNo) { 0310 case 0: 0311 assert(mNextbits == 0); 0312 writeBase64(ch >> 2, dcursor, dend); // top-most 6 bits -> output 0313 mNextbits = (ch & 0x3) << 4; // 0..1 bits -> 4..5 in mNextbits 0314 break; 0315 case 1: 0316 assert((mNextbits & ~0x30) == 0); 0317 writeBase64(mNextbits | ch >> 4, dcursor, dend); // 4..7 bits -> 0..3 in value 0318 mNextbits = (ch & 0xf) << 2; // 0..3 bits -> 2..5 in mNextbits 0319 break; 0320 case 2: 0321 assert((mNextbits & ~0x3C) == 0); 0322 writeBase64(mNextbits | ch >> 6, dcursor, dend); // 6..7 bits -> 0..1 in value 0323 writeBase64(ch & 0x3F, dcursor, dend); // 0..5 bits -> output 0324 mNextbits = 0; 0325 break; 0326 default: 0327 assert(0); 0328 } 0329 mStepNo = (mStepNo + 1) % 3; 0330 } 0331 0332 if (d->outputBufferCursor) { 0333 flushOutputBuffer(dcursor, dend); 0334 } 0335 0336 return scursor == send; 0337 } 0338 0339 bool Base64Encoder::finish(char *&dcursor, const char *const dend) 0340 { 0341 return generic_finish(dcursor, dend, true); 0342 } 0343 0344 bool Rfc2047BEncodingEncoder::finish(char *&dcursor, const char *const dend) 0345 { 0346 return generic_finish(dcursor, dend, false); 0347 } 0348 0349 bool Base64Encoder::generic_finish(char *&dcursor, const char *const dend, bool withLFatEnd) 0350 { 0351 if (mInsideFinishing) { 0352 return flushOutputBuffer(dcursor, dend); 0353 } 0354 0355 if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) { 0356 return false; 0357 } 0358 0359 mInsideFinishing = true; 0360 0361 // 0362 // writing out the last mNextbits... 0363 // 0364 switch (mStepNo) { 0365 case 1: // 2 mNextbits waiting to be written. Needs two padding chars: 0366 case 2: // 4 or 6 mNextbits waiting to be written. Completes a block 0367 writeBase64(mNextbits, dcursor, dend); 0368 mNextbits = 0; 0369 break; 0370 case 0: // no padding, nothing to be written, except possibly the CRLF 0371 assert(mNextbits == 0); 0372 break; 0373 default: 0374 assert(0); 0375 } 0376 0377 // 0378 // adding padding... 0379 // 0380 switch (mStepNo) { 0381 case 1: 0382 write('=', dcursor, dend); 0383 Q_FALLTHROUGH(); 0384 // fall through: 0385 case 2: 0386 write('=', dcursor, dend); 0387 Q_FALLTHROUGH(); 0388 // fall through: 0389 case 0: // completed an quartet - add CRLF 0390 if (withLFatEnd) { 0391 writeCRLF(dcursor, dend); 0392 } 0393 return flushOutputBuffer(dcursor, dend); 0394 default: 0395 assert(0); 0396 } 0397 return true; // asserts get compiled out 0398 } 0399 0400 } // namespace KCodecs