File indexing completed on 2024-09-15 11:55:01

0001 /*  -*- c++ -*-
0002     SPDX-FileCopyrightText: 2001 Marc Mutz <mutz@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 /**
0007   @file
0008   This file is part of the API for handling @ref MIME data and
0009   defines the @ref Base64 and @ref RFC2047B @ref Codec classes.
0010 
0011   @brief
0012   Defines the Base64Codec and Rfc2047BEncodingCodec classes.
0013 
0014   @authors Marc Mutz \<mutz@kde.org\>
0015 */
0016 
0017 #include "kcodecsbase64.h"
0018 #include "kcodecs_p.h"
0019 
0020 #include <QDebug>
0021 
0022 #include <cassert>
0023 
0024 using namespace KCodecs;
0025 
0026 namespace KCodecs
0027 {
0028 // codec for base64 as specified in RFC 2045
0029 // class Base64Codec;
0030 // class Base64Decoder;
0031 // class Base64Encoder;
0032 
0033 // codec for the B encoding as specified in RFC 2047
0034 // class Rfc2047BEncodingCodec;
0035 // class Rfc2047BEncodingEncoder;
0036 // class Rfc2047BEncodingDecoder;
0037 
0038 //@cond PRIVATE
0039 static const uchar base64DecodeMap[128] = {
0040     64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
0041 
0042     64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64,
0043 
0044     64, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64,
0045 
0046     64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64};
0047 
0048 static const char base64EncodeMap[64] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
0049                                          'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
0050                                          's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'};
0051 //@endcond
0052 
0053 class Base64Decoder : public Decoder
0054 {
0055     uint mStepNo;
0056     uchar mOutbits;
0057     bool mSawPadding : 1;
0058 
0059 protected:
0060     friend class Base64Codec;
0061     Base64Decoder(Codec::NewlineType newline = Codec::NewlineLF)
0062         : Decoder(newline)
0063         , mStepNo(0)
0064         , mOutbits(0)
0065         , mSawPadding(false)
0066     {
0067     }
0068 
0069 public:
0070     ~Base64Decoder() override
0071     {
0072     }
0073 
0074     bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override;
0075     // ### really needs no finishing???
0076     bool finish(char *&dcursor, const char *const dend) override
0077     {
0078         Q_UNUSED(dcursor);
0079         Q_UNUSED(dend);
0080         return true;
0081     }
0082 };
0083 
0084 class Base64Encoder : public Encoder
0085 {
0086     uint mStepNo;
0087     /** number of already written base64-quartets on current line */
0088     uint mWrittenPacketsOnThisLine;
0089     uchar mNextbits;
0090     bool mInsideFinishing : 1;
0091 
0092 protected:
0093     friend class Rfc2047BEncodingCodec;
0094     friend class Rfc2047BEncodingEncoder;
0095     friend class Base64Codec;
0096     Base64Encoder(Codec::NewlineType newline = Codec::NewlineLF)
0097         : Encoder(newline)
0098         , mStepNo(0)
0099         , mWrittenPacketsOnThisLine(0)
0100         , mNextbits(0)
0101         , mInsideFinishing(false)
0102     {
0103     }
0104 
0105     bool generic_finish(char *&dcursor, const char *const dend, bool withLFatEnd);
0106 
0107 public:
0108     ~Base64Encoder() override
0109     {
0110     }
0111 
0112     bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override;
0113 
0114     bool finish(char *&dcursor, const char *const dend) override;
0115 
0116 protected:
0117     bool writeBase64(uchar ch, char *&dcursor, const char *const dend)
0118     {
0119         return write(base64EncodeMap[ch], dcursor, dend);
0120     }
0121 };
0122 
0123 class Rfc2047BEncodingEncoder : public Base64Encoder
0124 {
0125 protected:
0126     friend class Rfc2047BEncodingCodec;
0127     Rfc2047BEncodingEncoder(Codec::NewlineType newline = Codec::NewlineLF)
0128         : Base64Encoder(newline)
0129     {
0130     }
0131 
0132 public:
0133     bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override;
0134     bool finish(char *&dcursor, const char *const dend) override;
0135 };
0136 
0137 Encoder *Base64Codec::makeEncoder(Codec::NewlineType newline) const
0138 {
0139     return new Base64Encoder(newline);
0140 }
0141 
0142 Decoder *Base64Codec::makeDecoder(Codec::NewlineType newline) const
0143 {
0144     return new Base64Decoder(newline);
0145 }
0146 
0147 Encoder *Rfc2047BEncodingCodec::makeEncoder(Codec::NewlineType newline) const
0148 {
0149     return new Rfc2047BEncodingEncoder(newline);
0150 }
0151 
0152 /********************************************************/
0153 /********************************************************/
0154 /********************************************************/
0155 
0156 bool Base64Decoder::decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend)
0157 {
0158     while (dcursor != dend && scursor != send) {
0159         uchar ch = *scursor++;
0160         uchar value;
0161 
0162         // try converting ch to a 6-bit value:
0163         if (ch < 128) {
0164             value = base64DecodeMap[ch];
0165         } else {
0166             value = 64;
0167         }
0168 
0169         // ch isn't of the base64 alphabet, check for other significant chars:
0170         if (value >= 64) {
0171             if (ch == '=') {
0172                 // padding:
0173                 if (mStepNo == 0 || mStepNo == 1) {
0174                     if (!mSawPadding) {
0175                         // malformed
0176                         // qWarning() << "Base64Decoder: unexpected padding"
0177                         //              "character in input stream";
0178                     }
0179                     mSawPadding = true;
0180                     break;
0181                 } else if (mStepNo == 2) {
0182                     // ok, there should be another one
0183                 } else if (mStepNo == 3) {
0184                     // ok, end of encoded stream
0185                     mSawPadding = true;
0186                     break;
0187                 }
0188                 mSawPadding = true;
0189                 mStepNo = (mStepNo + 1) % 4;
0190                 continue;
0191             } else {
0192                 // non-base64 alphabet
0193                 continue;
0194             }
0195         }
0196 
0197         if (mSawPadding) {
0198             // qWarning() << "Base64Decoder: Embedded padding character"
0199             //              "encountered!";
0200             return true;
0201         }
0202 
0203         // add the new bits to the output stream and flush full octets:
0204         switch (mStepNo) {
0205         case 0:
0206             mOutbits = value << 2;
0207             break;
0208         case 1:
0209             *dcursor++ = (char)(mOutbits | value >> 4);
0210             mOutbits = value << 4;
0211             break;
0212         case 2:
0213             *dcursor++ = (char)(mOutbits | value >> 2);
0214             mOutbits = value << 6;
0215             break;
0216         case 3:
0217             *dcursor++ = (char)(mOutbits | value);
0218             mOutbits = 0;
0219             break;
0220         default:
0221             assert(0);
0222         }
0223         mStepNo = (mStepNo + 1) % 4;
0224     }
0225 
0226     // return false when caller should call us again:
0227     return scursor == send;
0228 } // Base64Decoder::decode()
0229 
0230 bool Base64Encoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend)
0231 {
0232     const uint maxPacketsPerLine = 76 / 4;
0233 
0234     // detect when the caller doesn't adhere to our rules:
0235     if (mInsideFinishing) {
0236         return true;
0237     }
0238 
0239     while (scursor != send && dcursor != dend) {
0240         // properly empty the output buffer before starting something new:
0241         // ### fixme: we can optimize this away, since the buffer isn't
0242         // written to anyway (most of the time)
0243         if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) {
0244             return scursor == send;
0245         }
0246 
0247         uchar ch = *scursor++;
0248         // mNextbits   // (part of) value of next sextet
0249 
0250         // check for line length;
0251         if (mStepNo == 0 && mWrittenPacketsOnThisLine >= maxPacketsPerLine) {
0252             writeCRLF(dcursor, dend);
0253             mWrittenPacketsOnThisLine = 0;
0254         }
0255 
0256         // depending on mStepNo, extract value and mNextbits from the
0257         // octet stream:
0258         switch (mStepNo) {
0259         case 0:
0260             assert(mNextbits == 0);
0261             writeBase64(ch >> 2, dcursor, dend); // top-most 6 bits -> output
0262             mNextbits = (ch & 0x3) << 4; // 0..1 bits -> 4..5 in mNextbits
0263             break;
0264         case 1:
0265             assert((mNextbits & ~0x30) == 0);
0266             writeBase64(mNextbits | ch >> 4, dcursor, dend); // 4..7 bits -> 0..3 in value
0267             mNextbits = (ch & 0xf) << 2; // 0..3 bits -> 2..5 in mNextbits
0268             break;
0269         case 2:
0270             assert((mNextbits & ~0x3C) == 0);
0271             writeBase64(mNextbits | ch >> 6, dcursor, dend); // 6..7 bits -> 0..1 in value
0272             writeBase64(ch & 0x3F, dcursor, dend); // 0..5 bits -> output
0273             mNextbits = 0;
0274             mWrittenPacketsOnThisLine++;
0275             break;
0276         default:
0277             assert(0);
0278         }
0279         mStepNo = (mStepNo + 1) % 3;
0280     }
0281 
0282     if (d->outputBufferCursor) {
0283         flushOutputBuffer(dcursor, dend);
0284     }
0285 
0286     return scursor == send;
0287 }
0288 
0289 bool Rfc2047BEncodingEncoder::encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend)
0290 {
0291     // detect when the caller doesn't adhere to our rules:
0292     if (mInsideFinishing) {
0293         return true;
0294     }
0295 
0296     while (scursor != send && dcursor != dend) {
0297         // properly empty the output buffer before starting something new:
0298         // ### fixme: we can optimize this away, since the buffer isn't
0299         // written to anyway (most of the time)
0300         if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) {
0301             return scursor == send;
0302         }
0303 
0304         uchar ch = *scursor++;
0305         // mNextbits   // (part of) value of next sextet
0306 
0307         // depending on mStepNo, extract value and mNextbits from the
0308         // octet stream:
0309         switch (mStepNo) {
0310         case 0:
0311             assert(mNextbits == 0);
0312             writeBase64(ch >> 2, dcursor, dend); // top-most 6 bits -> output
0313             mNextbits = (ch & 0x3) << 4; // 0..1 bits -> 4..5 in mNextbits
0314             break;
0315         case 1:
0316             assert((mNextbits & ~0x30) == 0);
0317             writeBase64(mNextbits | ch >> 4, dcursor, dend); // 4..7 bits -> 0..3 in value
0318             mNextbits = (ch & 0xf) << 2; // 0..3 bits -> 2..5 in mNextbits
0319             break;
0320         case 2:
0321             assert((mNextbits & ~0x3C) == 0);
0322             writeBase64(mNextbits | ch >> 6, dcursor, dend); // 6..7 bits -> 0..1 in value
0323             writeBase64(ch & 0x3F, dcursor, dend); // 0..5 bits -> output
0324             mNextbits = 0;
0325             break;
0326         default:
0327             assert(0);
0328         }
0329         mStepNo = (mStepNo + 1) % 3;
0330     }
0331 
0332     if (d->outputBufferCursor) {
0333         flushOutputBuffer(dcursor, dend);
0334     }
0335 
0336     return scursor == send;
0337 }
0338 
0339 bool Base64Encoder::finish(char *&dcursor, const char *const dend)
0340 {
0341     return generic_finish(dcursor, dend, true);
0342 }
0343 
0344 bool Rfc2047BEncodingEncoder::finish(char *&dcursor, const char *const dend)
0345 {
0346     return generic_finish(dcursor, dend, false);
0347 }
0348 
0349 bool Base64Encoder::generic_finish(char *&dcursor, const char *const dend, bool withLFatEnd)
0350 {
0351     if (mInsideFinishing) {
0352         return flushOutputBuffer(dcursor, dend);
0353     }
0354 
0355     if (d->outputBufferCursor && !flushOutputBuffer(dcursor, dend)) {
0356         return false;
0357     }
0358 
0359     mInsideFinishing = true;
0360 
0361     //
0362     // writing out the last mNextbits...
0363     //
0364     switch (mStepNo) {
0365     case 1: // 2 mNextbits waiting to be written. Needs two padding chars:
0366     case 2: // 4 or 6 mNextbits waiting to be written. Completes a block
0367         writeBase64(mNextbits, dcursor, dend);
0368         mNextbits = 0;
0369         break;
0370     case 0: // no padding, nothing to be written, except possibly the CRLF
0371         assert(mNextbits == 0);
0372         break;
0373     default:
0374         assert(0);
0375     }
0376 
0377     //
0378     // adding padding...
0379     //
0380     switch (mStepNo) {
0381     case 1:
0382         write('=', dcursor, dend);
0383         Q_FALLTHROUGH();
0384     // fall through:
0385     case 2:
0386         write('=', dcursor, dend);
0387         Q_FALLTHROUGH();
0388     // fall through:
0389     case 0: // completed an quartet - add CRLF
0390         if (withLFatEnd) {
0391             writeCRLF(dcursor, dend);
0392         }
0393         return flushOutputBuffer(dcursor, dend);
0394     default:
0395         assert(0);
0396     }
0397     return true; // asserts get compiled out
0398 }
0399 
0400 } // namespace KCodecs