src/antispam/spamheaderanalyzer.cpp

0001 /*
0002     spamheaderanalyzer.cpp
0003
0004     This file is part of KMail, the KDE mail client.
0005     SPDX-FileCopyrightText: 2004 Patrick Audley <paudley@blackcat.ca>
0006     SPDX-FileCopyrightText: 2004 Ingo Kloecker <kloecker@kde.org>
0007
0008     SPDX-License-Identifier: GPL-2.0-or-later
0009 */
0010
0011 #include "spamheaderanalyzer.h"
0012 #include "antispamconfig.h"
0013 #include "messageviewer_debug.h"
0014
0015 #include <KMime/Headers>
0016 #include <KMime/KMimeMessage>
0017 #include <KMime/Message>
0018
0019 #include <QRegularExpression>
0020
0021 using namespace MessageViewer;
0022
0023 // static
0024 SpamScores SpamHeaderAnalyzer::getSpamScores(KMime::Message *message)
0025 {
0026     SpamScores scores;
0027     const SpamAgents agents = AntiSpamConfig::instance()->uniqueAgents();
0028     SpamAgents::const_iterator end(agents.constEnd());
0029     for (SpamAgents::const_iterator it = agents.constBegin(); it != end; ++it) {
0030         float score = -2.0;
0031
0032         SpamError spamError = noError;
0033
0034         // Skip bogus agents
0035         if ((*it).scoreType() == SpamAgentNone) {
0036             continue;
0037         }
0038
0039         // Do we have the needed score field for this agent?
0040         KMime::Headers::Base *header = message->headerByType((*it).header().constData());
0041         if (!header) {
0042             continue;
0043         }
0044
0045         const QString mField = header->asUnicodeString();
0046
0047         if (mField.isEmpty()) {
0048             continue;
0049         }
0050
0051         QString scoreString;
0052         bool scoreValid = false;
0053
0054         if ((*it).scoreType() != SpamAgentBool) {
0055             // Can we extract the score?
0056             QRegularExpression scorePattern = (*it).scorePattern();
0057             if (scorePattern.match(mField).hasMatch()) {
0058                 scoreString = scorePattern.match(mField).captured(1);
0059                 scoreValid = true;
0060             }
0061         } else {
0062             scoreValid = true;
0063         }
0064
0065         if (!scoreValid) {
0066             spamError = couldNotFindTheScoreField;
0067             qCDebug(MESSAGEVIEWER_LOG) << "Score could not be extracted from header '" << mField << "'";
0068         } else {
0069             bool floatValid = false;
0070             switch ((*it).scoreType()) {
0071             case SpamAgentNone:
0072                 spamError = errorExtractingAgentString;
0073                 break;
0074
0075             case SpamAgentBool:
0076                 if ((*it).scorePattern().match(mField).hasMatch()) {
0077                     score = 0.0;
0078                 } else {
0079                     score = 100.0;
0080                 }
0081                 break;
0082
0083             case SpamAgentFloat:
0084                 score = scoreString.toFloat(&floatValid);
0085                 if (!floatValid) {
0086                     spamError = couldNotConverScoreToFloat;
0087                     qCDebug(MESSAGEVIEWER_LOG) << "Score (" << scoreString << ") is no number";
0088                 } else {
0089                     score *= 100.0;
0090                 }
0091                 break;
0092
0093             case SpamAgentFloatLarge:
0094                 score = scoreString.toFloat(&floatValid);
0095                 if (!floatValid) {
0096                     spamError = couldNotConverScoreToFloat;
0097                     qCDebug(MESSAGEVIEWER_LOG) << "Score (" << scoreString << ") is no number";
0098                 }
0099                 break;
0100
0101             case SpamAgentAdjustedFloat:
0102                 score = scoreString.toFloat(&floatValid);
0103                 if (!floatValid) {
0104                     spamError = couldNotConverScoreToFloat;
0105                     qCDebug(MESSAGEVIEWER_LOG) << "Score (" << scoreString << ") is no number";
0106                     break;
0107                 }
0108
0109                 // Find the threshold value.
0110                 QString thresholdString;
0111                 const QRegularExpression thresholdPattern = (*it).thresholdPattern();
0112                 if (thresholdPattern.match(mField).hasMatch()) {
0113                     thresholdString = thresholdPattern.match(mField).captured(1);
0114                 } else {
0115                     spamError = couldNotFindTheThresholdField;
0116                     qCDebug(MESSAGEVIEWER_LOG) << "Threshold could not be extracted from header '" << mField << "'";
0117                     break;
0118                 }
0119                 const float threshold = thresholdString.toFloat(&floatValid);
0120                 if (!floatValid || (threshold <= 0.0)) {
0121                     spamError = couldNotConvertThresholdToFloatOrThresholdIsNegative;
0122                     qCDebug(MESSAGEVIEWER_LOG) << "Threshold (" << thresholdString << ") is no"
0123                                                << "number or is negative";
0124                     break;
0125                 }
0126
0127                 // Normalize the score. Anything below 0 means 0%, anything above
0128                 // threshold mean 100%. Values between 0 and threshold are mapped
0129                 // linearly to 0% - 100%.
0130                 if (score < 0.0) {
0131                     score = 0.0;
0132                 } else if (score > threshold) {
0133                     score = 100.0;
0134                 } else {
0135                     score = score / threshold * 100.0;
0136                 }
0137                 break;
0138             }
0139         }
0140         // Find the confidence
0141         float confidence = -2.0;
0142         QString confidenceString = QStringLiteral("-2.0");
0143         bool confidenceValid = false;
0144         // Do we have the needed confidence field for this agent?
0145         const QByteArray confidenceHeaderName = (*it).confidenceHeader();
0146         QString mCField;
0147         if (!confidenceHeaderName.isEmpty()) {
0148             KMime::Headers::Base *cHeader = message->headerByType(confidenceHeaderName.constData());
0149             if (cHeader) {
0150                 mCField = cHeader->asUnicodeString();
0151                 if (!mCField.isEmpty()) {
0152                     // Can we extract the confidence?
0153                     QRegularExpression cScorePattern = (*it).confidencePattern();
0154                     if (cScorePattern.match(mCField).hasMatch()) {
0155                         confidenceString = cScorePattern.match(mCField).captured(1);
0156                     }
0157                     confidence = confidenceString.toFloat(&confidenceValid);
0158                     if (!confidenceValid) {
0159                         spamError = couldNotConvertConfidenceToFloat;
0160                         qCDebug(MESSAGEVIEWER_LOG) << "Unable to convert confidence to float:" << confidenceString;
0161                     }
0162                 }
0163             }
0164         }
0165         scores.append(SpamScore((*it).name(), spamError, score, confidence * 100, mField, mCField));
0166     }
0167
0168     return scores;
0169 }