File indexing completed on 2024-03-24 17:21:31

0001 /*
0002  *  SPDX-FileCopyrightText: 2019  Andreas Cord-Landwehr <cordlandwehr@kde.org>
0003  *
0004  *  This program is free software; you can redistribute it and/or
0005  *  modify it under the terms of the GNU General Public License as
0006  *  published by the Free Software Foundation; either version 2 of
0007  *  the License or (at your option) version 3 or any later version
0008  *  accepted by the membership of KDE e.V. (or its successor approved
0009  *  by the membership of KDE e.V.), which shall act as a proxy
0010  *  defined in Section 14 of version 3 of the license.
0011  *
0012  *  This program is distributed in the hope that it will be useful,
0013  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
0014  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0015  *  GNU General Public License for more details.
0016  *
0017  *  You should have received a copy of the GNU General Public License
0018  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
0019  */
0020 
0021 #include "licenseregistry.h"
0022 #include <QDebug>
0023 #include <QDir>
0024 #include <QDirIterator>
0025 
0026 const QString LicenseRegistry::ToClarifyLicense("TO-CLARIFY");
0027 const QString LicenseRegistry::UnknownLicense("UNKNOWN-LICENSE");
0028 const QString LicenseRegistry::AmbigiousLicense("AMBIGIOUS");
0029 const QString LicenseRegistry::MissingLicense("MISSING-LICENSE");
0030 const QString LicenseRegistry::MissingLicenseForGeneratedFile("MISSING-LICENSE-GENERATED-FILE");
0031 
0032 LicenseRegistry::LicenseRegistry(QObject *parent)
0033     : QObject(parent)
0034 {
0035     loadLicenseHeaders();
0036     loadLicenseFiles();
0037 }
0038 
0039 void LicenseRegistry::loadLicenseHeaders()
0040 {
0041     if (!m_registry.isEmpty()) {
0042         m_registry.clear();
0043     }
0044     m_registry[LicenseRegistry::UnknownLicense] = QVector<QString> {"THIS IS A STUB HEADER FOR UNKNOWN LICENSES, IT SHALL NEVER MATCH"};
0045 
0046     QDirIterator spdxIter(":/licenses_templates/");
0047     while (spdxIter.hasNext()) {
0048         QString filePath = spdxIter.next();
0049         if (!spdxIter.fileInfo().isDir()) {
0050             qWarning() << "A non-directory was found here unexpected:" << spdxIter.fileInfo();
0051             continue;
0052         }
0053         QVector<QString> headerTexts;
0054         QDirIterator headerIter(filePath);
0055         while (headerIter.hasNext()) {
0056             QFile file(headerIter.next());
0057             file.open(QIODevice::ReadOnly);
0058             headerTexts.append(file.readAll());
0059         }
0060         // sort license texts lexicographically decreasing
0061         // this is a simple solution for the problem when one license text is a prefix of another license text
0062         // which is known for license texts with omitted "." at the end
0063         std::sort(headerTexts.begin(), headerTexts.end(), [](const QString &lhs, const QString &rhs) { return lhs > rhs; });
0064         m_registry[spdxIter.fileName()] = headerTexts;
0065     }
0066 }
0067 
0068 void LicenseRegistry::loadLicenseFiles()
0069 {
0070     QDirIterator textIter(":/licensetexts/");
0071     while (textIter.hasNext()) {
0072         QString filePath = textIter.next();
0073         if (textIter.fileInfo().isDir()) {
0074             qWarning() << "Unexpected directory found:" << textIter.fileInfo();
0075             continue;
0076         }
0077         QString baseName = textIter.fileName().mid(0, textIter.fileName().length() - 4); // remove ".txt"
0078         m_licenseFiles.insert(baseName, textIter.filePath());
0079     }
0080 }
0081 
0082 QVector<LicenseRegistry::SpdxExpression> LicenseRegistry::expressions() const
0083 {
0084     return m_registry.keys().toVector();
0085 }
0086 
0087 QVector<LicenseRegistry::SpdxIdentifier> LicenseRegistry::identifiers() const
0088 {
0089     return m_licenseFiles.keys().toVector();
0090 }
0091 
0092 QMap<LicenseRegistry::SpdxIdentifier, QString> LicenseRegistry::licenseFiles() const
0093 {
0094     return m_licenseFiles;
0095 }
0096 
0097 QVector<QString> LicenseRegistry::headerTexts(const LicenseRegistry::SpdxExpression &identifier) const
0098 {
0099     return m_registry.value(identifier);
0100 }
0101 
0102 QVector<QRegularExpression> LicenseRegistry::headerTextRegExps(const SpdxExpression &identifier) const
0103 {
0104     if (!m_registry.contains(identifier)) {
0105         qCritical() << identifier << "identifier not found, returning error matcher";
0106         return QVector<QRegularExpression> {QRegularExpression("DOES_NOT_MATCH_ANY_LICENSE_HEADER")};
0107     }
0108     if (m_regexpsCache.contains(identifier)) {
0109         return m_regexpsCache.value(identifier);
0110     }
0111 
0112     QVector<QString> patterns;
0113     // additional to all headers also add the SPDX identifier
0114     for (const QString &header : m_registry.value(identifier)) {
0115         QString pattern(QRegularExpression::escape(header));
0116         // start detection at first word of license string to make detection easier
0117         pattern.replace("\\\n", "[#\\* \\/-]*\\\n[#\\* \\t\\/-]*"); // allow prefixes and suffixes of whitespace mixed with stars or -
0118 
0119         // remove line-break pattern from last line
0120         pattern = pattern.left(pattern.length() - QString("[\\* \\/]*\\\n[\\* \\/]*").length());
0121         pattern.append("[\\* ]*");
0122 
0123         patterns.append(pattern);
0124     }
0125 
0126     QVector<QString>::const_iterator iter = patterns.constBegin();
0127     QString fullPattern = QString("(%1)").arg(*iter);
0128     QVector<QRegularExpression> regexps;
0129     QRegularExpression testExpr;
0130 
0131     while (++iter != patterns.constEnd()) {
0132         QString patternItem = QString("|(%1)").arg(*iter);
0133         testExpr.setPattern(fullPattern + patternItem);
0134 
0135         if (!testExpr.isValid()) {
0136             regexps += QRegularExpression(fullPattern);
0137             fullPattern = QString("(%1)").arg(*iter);
0138         } else {
0139             fullPattern.append(patternItem);
0140         }
0141     }
0142 
0143     regexps += QRegularExpression(fullPattern);
0144     m_regexpsCache[identifier] = regexps;
0145     return regexps;
0146 }
0147 
0148 bool LicenseRegistry::isFakeLicenseMarker(const QString &expression) const
0149 {
0150     const QStringList fakeExpressions {LicenseRegistry::ToClarifyLicense, LicenseRegistry::UnknownLicense, LicenseRegistry::MissingLicense, LicenseRegistry::AmbigiousLicense, LicenseRegistry::MissingLicenseForGeneratedFile};
0151     return fakeExpressions.contains(expression);
0152 }