kcodecs/src/kcodecsuuencode.cpp

0001 /*  -*- c++ -*-
0002     SPDX-FileCopyrightText: 2002 Marc Mutz <mutz@kde.org>
0003
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 /**
0007   @file
0008   This file is part of the API for handling @ref MIME data and
0009   defines a @ref uuencode @ref Codec class.
0010
0011   @brief
0012   Defines the UUCodec class.
0013
0014   @authors Marc Mutz \<mutz@kde.org\>
0015 */
0016
0017 #include "kcodecsuuencode.h"
0018
0019 #include <QDebug>
0020
0021 #include <cassert>
0022
0023 using namespace KCodecs;
0024
0025 namespace KCodecs
0026 {
0027 class UUDecoder : public Decoder
0028 {
0029     uint mStepNo;
0030     uchar mAnnouncedOctetCount; // (on current line)
0031     uchar mCurrentOctetCount; // (on current line)
0032     uchar mOutbits;
0033     bool mLastWasCRLF : 1;
0034     bool mSawBegin : 1; // whether we already saw ^begin...
0035     uint mIntoBeginLine : 3; // count #chars we compared against "begin" 0..5
0036     bool mSawEnd : 1; // whether we already saw ^end...
0037     uint mIntoEndLine : 2; // count #chars we compared against "end" 0..3
0038
0039     void searchForBegin(const char *&scursor, const char *const send);
0040
0041 protected:
0042     friend class UUCodec;
0043     UUDecoder(Codec::NewlineType newline = Codec::NewlineLF)
0044         : Decoder(newline)
0045         , mStepNo(0)
0046         , mAnnouncedOctetCount(0)
0047         , mCurrentOctetCount(0)
0048         , mOutbits(0)
0049         , mLastWasCRLF(true)
0050         , mSawBegin(false)
0051         , mIntoBeginLine(0)
0052         , mSawEnd(false)
0053         , mIntoEndLine(0)
0054     {
0055     }
0056
0057 public:
0058     ~UUDecoder() override
0059     {
0060     }
0061
0062     bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) override;
0063     // ### really needs no finishing???
0064     bool finish(char *&dcursor, const char *const dend) override
0065     {
0066         Q_UNUSED(dcursor);
0067         Q_UNUSED(dend);
0068         return true;
0069     }
0070 };
0071
0072 Encoder *UUCodec::makeEncoder(NewlineType newline) const
0073 {
0074     Q_UNUSED(newline)
0075     return nullptr; // encoding not supported
0076 }
0077
0078 Decoder *UUCodec::makeDecoder(NewlineType newline) const
0079 {
0080     return new UUDecoder(newline);
0081 }
0082
0083 /********************************************************/
0084 /********************************************************/
0085 /********************************************************/
0086
0087 void UUDecoder::searchForBegin(const char *&scursor, const char *const send)
0088 {
0089     static const char begin[] = "begin\n";
0090     static const uint beginLength = 5; // sic!
0091
0092     assert(!mSawBegin || mIntoBeginLine > 0);
0093
0094     while (scursor != send) {
0095         uchar ch = *scursor++;
0096         if (ch == begin[mIntoBeginLine]) {
0097             if (mIntoBeginLine < beginLength) {
0098                 // found another char
0099                 ++mIntoBeginLine;
0100                 if (mIntoBeginLine == beginLength) {
0101                     mSawBegin = true; // "begin" complete, now search the next \n...
0102                 }
0103             } else { // mIntoBeginLine == beginLength
0104                 // found '\n': begin line complete
0105                 mLastWasCRLF = true;
0106                 mIntoBeginLine = 0;
0107                 return;
0108             }
0109         } else if (mSawBegin) {
0110             // OK, skip stuff until the next \n
0111         } else {
0112             // qWarning() << "UUDecoder: garbage before \"begin\", resetting parser";
0113             mIntoBeginLine = 0;
0114         }
0115     }
0116 }
0117
0118 // uuencoding just shifts all 6-bit octets by 32 (SP/' '), except NUL,
0119 // which gets mapped to 0x60
0120 static inline uchar uuDecode(uchar c)
0121 {
0122     return (c - ' ') // undo shift and
0123         & 0x3F; // map 0x40 (0x60-' ') to 0...
0124 }
0125
0126 bool UUDecoder::decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend)
0127 {
0128     // First, check whether we still need to find the "begin" line:
0129     if (!mSawBegin || mIntoBeginLine != 0) {
0130         searchForBegin(scursor, send);
0131     } else if (mSawEnd) {
0132         // or if we are past the end line:
0133         scursor = send; // do nothing anymore...
0134         return true;
0135     }
0136
0137     while (dcursor != dend && scursor != send) {
0138         uchar ch = *scursor++;
0139         uchar value;
0140
0141         // Check whether we need to look for the "end" line:
0142         if (mIntoEndLine > 0) {
0143             static const char end[] = "end";
0144             static const uint endLength = 3;
0145
0146             if (ch == end[mIntoEndLine]) {
0147                 ++mIntoEndLine;
0148                 if (mIntoEndLine == endLength) {
0149                     mSawEnd = true;
0150                     scursor = send; // shortcut to the end
0151                     return true;
0152                 }
0153                 continue;
0154             } else {
0155                 // qWarning() << "UUDecoder: invalid line octet count looks like \"end\" (mIntoEndLine ="
0156                 //           << mIntoEndLine << ")!";
0157                 mIntoEndLine = 0;
0158                 // fall through...
0159             }
0160         }
0161
0162         // Normal parsing:
0163
0164         // The first char of a line is an encoding of the length of the
0165         // current line. We simply ignore it:
0166         if (mLastWasCRLF) {
0167             // reset char-per-line counter:
0168             mLastWasCRLF = false;
0169             mCurrentOctetCount = 0;
0170
0171             // try to decode the chars-on-this-line announcement:
0172             if (ch == 'e') { // maybe the beginning of the "end"? ;-)
0173                 mIntoEndLine = 1;
0174             } else if (ch > 0x60) {
0175                 // ### invalid line length char: what shall we do??
0176             } else if (ch > ' ') {
0177                 mAnnouncedOctetCount = uuDecode(ch);
0178             } else if (ch == '\n') {
0179                 mLastWasCRLF = true; // oops, empty line
0180             }
0181
0182             continue;
0183         }
0184
0185         // try converting ch to a 6-bit value:
0186         if (ch > 0x60) {
0187             continue; // invalid char
0188         } else if (ch > ' ') {
0189             value = uuDecode(ch);
0190         } else if (ch == '\n') { // line end
0191             mLastWasCRLF = true;
0192             continue;
0193         } else {
0194             continue;
0195         }
0196
0197         // add the new bits to the output stream and flush full octets:
0198         switch (mStepNo) {
0199         case 0:
0200             mOutbits = value << 2;
0201             break;
0202         case 1:
0203             if (mCurrentOctetCount < mAnnouncedOctetCount) {
0204                 *dcursor++ = (char)(mOutbits | value >> 4);
0205             }
0206             ++mCurrentOctetCount;
0207             mOutbits = value << 4;
0208             break;
0209         case 2:
0210             if (mCurrentOctetCount < mAnnouncedOctetCount) {
0211                 *dcursor++ = (char)(mOutbits | value >> 2);
0212             }
0213             ++mCurrentOctetCount;
0214             mOutbits = value << 6;
0215             break;
0216         case 3:
0217             if (mCurrentOctetCount < mAnnouncedOctetCount) {
0218                 *dcursor++ = (char)(mOutbits | value);
0219             }
0220             ++mCurrentOctetCount;
0221             mOutbits = 0;
0222             break;
0223         default:
0224             assert(0);
0225         }
0226         mStepNo = (mStepNo + 1) % 4;
0227
0228         // check whether we ran over the announced octet count for this line:
0229         if (mCurrentOctetCount == mAnnouncedOctetCount + 1) {
0230             // qWarning()
0231             //         << "UUDecoder: mismatch between announced ("
0232             //         << mAnnouncedOctetCount << ") and actual line octet count!";
0233         }
0234     }
0235
0236     // return false when caller should call us again:
0237     return scursor == send;
0238 } // UUDecoder::decode()
0239
0240 } // namespace KCodecs