Warning, file /pim/kmime/src/kmime_charfreq.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /* 0002 kmime_charfreq.cpp 0003 0004 KMime, the KDE Internet mail/usenet news message library. 0005 SPDX-FileCopyrightText: 2001-2002 Marc Mutz <mutz@kde.org> 0006 0007 SPDX-License-Identifier: LGPL-2.0-or-later 0008 */ 0009 0010 /** 0011 @file 0012 This file is part of the API for handling MIME data and 0013 defines the CharFreq class. 0014 0015 @brief 0016 Defines the CharFreq class. 0017 0018 @authors Marc Mutz \<mutz@kde.org\> 0019 */ 0020 0021 #include "kmime_charfreq.h" 0022 #include "kmime_debug.h" 0023 0024 using namespace KMime; 0025 0026 /** 0027 * Private class that helps to provide binary compatibility between releases. 0028 * @internal 0029 */ 0030 //@cond PRIVATE 0031 //class KMime::CharFreq::Private 0032 //{ 0033 // public: 0034 //}; 0035 //@endcond 0036 0037 CharFreq::CharFreq(const QByteArray &buf) 0038 : mNUL(0), 0039 mCTL(0), 0040 mCR(0), mLF(0), 0041 mCRLF(0), 0042 mPrintable(0), 0043 mEightBit(0), 0044 mTotal(0), 0045 mLineMin(0xffffffff), 0046 mLineMax(0) 0047 { 0048 if (!buf.isEmpty()) { 0049 count(buf.data(), buf.size()); 0050 } 0051 } 0052 0053 CharFreq::CharFreq(const char *buf, size_t len) 0054 : mNUL(0), 0055 mCTL(0), 0056 mCR(0), mLF(0), 0057 mCRLF(0), 0058 mPrintable(0), 0059 mEightBit(0), 0060 mTotal(0), 0061 mLineMin(0xffffffff), 0062 mLineMax(0) 0063 { 0064 if (buf && len > 0) { 0065 count(buf, len); 0066 } 0067 } 0068 0069 //@cond PRIVATE 0070 static inline bool isWS(char ch) 0071 { 0072 return (ch == '\t' || ch == ' '); 0073 } 0074 //@endcond 0075 0076 void CharFreq::count(const char *it, size_t len) 0077 { 0078 const char *end = it + len; 0079 uint currentLineLength = 0; 0080 // initialize the prevChar with LF so that From_ detection works w/o 0081 // special-casing: 0082 char prevChar = '\n'; 0083 char prevPrevChar = 0; 0084 0085 for (; it != end ; ++it) { 0086 ++currentLineLength; 0087 switch (*it) { 0088 case '\0': ++mNUL; break; 0089 case '\r': ++mCR; break; 0090 case '\n': ++mLF; 0091 if (prevChar == '\r') { 0092 --currentLineLength; ++mCRLF; 0093 } 0094 if (currentLineLength >= mLineMax) { 0095 mLineMax = currentLineLength - 1; 0096 } 0097 if (currentLineLength <= mLineMin) { 0098 mLineMin = currentLineLength - 1; 0099 } 0100 if (!mTrailingWS) { 0101 if (isWS(prevChar) || 0102 (prevChar == '\r' && isWS(prevPrevChar))) { 0103 mTrailingWS = true; 0104 } 0105 } 0106 currentLineLength = 0; 0107 break; 0108 case 'F': // check for lines starting with From_ if not found already: 0109 if (!mLeadingFrom) { 0110 if (prevChar == '\n' && end - it >= 5 && 0111 !qstrncmp("From ", it, 5)) { 0112 mLeadingFrom = true; 0113 } 0114 } 0115 ++mPrintable; 0116 break; 0117 default: { 0118 uchar c = *it; 0119 if (c == '\t' || (c >= ' ' && c <= '~')) { 0120 ++mPrintable; 0121 } else if (c == 127 || c < ' ') { 0122 ++mCTL; 0123 } else { 0124 ++mEightBit; 0125 } 0126 } 0127 } 0128 prevPrevChar = prevChar; 0129 prevChar = *it; 0130 } 0131 0132 // consider the length of the last line 0133 if (currentLineLength >= mLineMax) { 0134 mLineMax = currentLineLength; 0135 } 0136 if (currentLineLength <= mLineMin) { 0137 mLineMin = currentLineLength; 0138 } 0139 0140 // check whether the last character is tab or space 0141 if (isWS(prevChar)) { 0142 mTrailingWS = true; 0143 } 0144 0145 mTotal = len; 0146 } 0147 0148 bool CharFreq::isEightBitData() const 0149 { 0150 return type() == EightBitData; 0151 } 0152 0153 bool CharFreq::isEightBitText() const 0154 { 0155 return type() == EightBitText; 0156 } 0157 0158 bool CharFreq::isSevenBitData() const 0159 { 0160 return type() == SevenBitData; 0161 } 0162 0163 bool CharFreq::isSevenBitText() const 0164 { 0165 return type() == SevenBitText; 0166 } 0167 0168 bool CharFreq::hasTrailingWhitespace() const 0169 { 0170 return mTrailingWS; 0171 } 0172 0173 bool CharFreq::hasLeadingFrom() const 0174 { 0175 return mLeadingFrom; 0176 } 0177 0178 CharFreq::Type CharFreq::type() const 0179 { 0180 #if 0 0181 qCDebug(KMIME_LOG)("Total: %d; NUL: %d; CTL: %d;\n" 0182 "CR: %d; LF: %d; CRLF: %d;\n" 0183 "lineMin: %d; lineMax: %d;\n" 0184 "printable: %d; eightBit: %d;\n" 0185 "trailing whitespace: %s;\n" 0186 "leading 'From ': %s;\n", 0187 total, NUL, CTL, CR, LF, CRLF, lineMin, lineMax, 0188 printable, eightBit, 0189 mTrailingWS ? "yes" : "no" , mLeadingFrom ? "yes" : "no"); 0190 #endif 0191 if (mNUL) { // must be binary 0192 return Binary; 0193 } 0194 0195 // doesn't contain NUL's: 0196 if (mEightBit) { 0197 if (mLineMax > 988) { 0198 return EightBitData; // not allowed in 8bit 0199 } 0200 if ((mLF != mCRLF && mCRLF > 0) || mCR != mCRLF || controlCodesRatio() > 0.2) { 0201 return EightBitData; 0202 } 0203 return EightBitText; 0204 } 0205 0206 // doesn't contain NUL's, nor 8bit chars: 0207 if (mLineMax > 988) { 0208 return SevenBitData; 0209 } 0210 if ((mLF != mCRLF && mCRLF > 0) || mCR != mCRLF || controlCodesRatio() > 0.2) { 0211 return SevenBitData; 0212 } 0213 0214 // no NUL, no 8bit chars, no excessive CTLs and no lines > 998 chars: 0215 return SevenBitText; 0216 } 0217 0218 float CharFreq::printableRatio() const 0219 { 0220 if (mTotal) { 0221 return float(mPrintable) / float(mTotal); 0222 } else { 0223 return 0; 0224 } 0225 } 0226 0227 float CharFreq::controlCodesRatio() const 0228 { 0229 if (mTotal) { 0230 return float(mCTL) / float(mTotal); 0231 } else { 0232 return 0; 0233 } 0234 } 0235