File indexing completed on 2024-04-21 16:29:08
0001 /* 0002 * SPDX-FileCopyrightText: 2019 Andreas Cord-Landwehr <cordlandwehr@kde.org> 0003 * 0004 * SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL 0005 */ 0006 0007 #include "directoryparser.h" 0008 #include "skipparser.h" 0009 #include <QDebug> 0010 #include <QDirIterator> 0011 #include <QTextStream> 0012 #include <QVector> 0013 0014 const QStringList DirectoryParser::s_supportedExtensions = {".cpp", ".cc", ".c", ".h", ".css", ".hpp", ".qml", ".cmake", "CMakeLists.txt", ".in", ".py", ".frag", ".vert", 0015 ".glsl", "php", "sh", ".mm", ".java", ".kt", ".js", ".xml", ".xsd", ".xsl", ".pl", ".rb", ".docbook"}; 0016 0017 bool shallIgnoreFile(const QDirIterator &iterator, const QRegularExpression &fileToIgnorePattern) 0018 { 0019 QFileInfo fileInfo(iterator.fileInfo()); 0020 return !fileInfo.isFile() or (!fileToIgnorePattern.pattern().isEmpty() && fileToIgnorePattern.match(fileInfo.filePath()).hasMatch()); 0021 } 0022 0023 void DirectoryParser::setLicenseHeaderParser(LicenseParser parser) 0024 { 0025 m_parserType = parser; 0026 } 0027 0028 QRegularExpression DirectoryParser::spdxStatementRegExp() const 0029 { 0030 static auto regexp = QRegularExpression("(SPDX-License-Identifier: (?<expression>(.*)))"); 0031 return regexp; 0032 } 0033 0034 QRegularExpression DirectoryParser::copyrightRegExp() const 0035 { 0036 static auto regexp = QRegularExpression( 0037 "(?<!\")" // negative lookahead for quotation marks, to skip string statements 0038 "(SPDX-FileCopyrightText:|[cC]opyright(\\s*:?\\s+\\([cC]\\))|(?<![cC]opyright )\\([cC]\\)|[cC]opyright\\s+©|(?<![cC]opyright )©|[cC]opyright(\\s*:)?)" 0039 "[, ]+" 0040 "(?<years>([0-9]+(-[0-9]+| - [0-9]+| to [0-9]+|,[ ]?[0-9]+)*|%{CURRENT_YEAR}))" 0041 "[, ]+" 0042 "([bB]y[ ]+)?" 0043 "(?<name>([\u00C0-\u017Fa-zA-Z\\-\\.]+( [\u00C0-\u017Fa-zA-Z\\-\\.]+)*|%{AUTHOR}))" 0044 "[, ]*" 0045 "(?<contact>.*|%{EMAIL})"); 0046 return regexp; 0047 } 0048 0049 QString DirectoryParser::cleanupSpaceInCopyrightYearList(const QString &originalYearText) const 0050 { 0051 QString cleanedYearText = originalYearText; 0052 0053 static auto missingWhitespaceAfterCommaRegex = QRegularExpression(QStringLiteral(",(?=[0-9])")); 0054 static auto unneededWhitespaceAroundRangeRegex = QRegularExpression(QStringLiteral(" - (?=[0-9])")); 0055 static auto writtenRangeStatementRegex = QRegularExpression(QStringLiteral(" to (?=[0-9])")); 0056 0057 cleanedYearText.replace(missingWhitespaceAfterCommaRegex, QStringLiteral(", ")); 0058 cleanedYearText.replace(unneededWhitespaceAroundRangeRegex, QStringLiteral("-")); 0059 cleanedYearText.replace(writtenRangeStatementRegex, QStringLiteral("-")); 0060 0061 return cleanedYearText; 0062 } 0063 0064 QString DirectoryParser::unifyCopyrightStatements(const QString &originalText) const 0065 { 0066 QString header = originalText; 0067 QRegularExpression regExp = copyrightRegExp(); 0068 auto match = regExp.match(header); 0069 0070 while (match.hasMatch()) { 0071 QString years = match.captured("years"); 0072 years = cleanupSpaceInCopyrightYearList(years); 0073 QString name = match.captured("name"); 0074 QString contact = match.captured("contact"); 0075 0076 QString unifiedCopyright = QString("SPDX-FileCopyrightText: %1 %2 %3").arg(years).arg(name).arg(contact).trimmed(); 0077 header.replace(match.capturedStart(), match.capturedLength(), unifiedCopyright); 0078 match = regExp.match(header, match.capturedStart() + unifiedCopyright.length()); 0079 } 0080 return header; 0081 } 0082 0083 QString DirectoryParser::unifyCopyrightCommentHeader(const QString &originalText) const 0084 { 0085 // restrict conversion to top-file comments 0086 if (!originalText.startsWith("/*")) { 0087 qWarning() << "\tFile not starting with a comment."; 0088 return originalText; 0089 } 0090 auto lines = originalText.split("\n"); 0091 for (int i = 0; i < lines.size(); ++i) { 0092 lines[i].replace(QRegularExpression("/(\\*)+"), "/*"); // initial comment line 0093 if (lines[i].startsWith("/*")) { 0094 continue; // do not further modify first line 0095 } 0096 lines[i].replace(QRegularExpression("[ ]*(\\*)+/"), "*/"); // final comment line 0097 if (lines[i].startsWith("*/")) { 0098 break; 0099 } 0100 // invariant: the following line is guaranteed to be a port of multiline comment 0101 lines[i].replace(QRegularExpression("^[ \\*]+(?!(\\\\))"), " "); // in-between line 0102 lines[i].replace(QRegularExpression("[ \\*]+$"), ""); // in-between line 0103 } 0104 QString text = lines.join("\n"); 0105 //qDebug() << text; 0106 return text; 0107 } 0108 0109 QString DirectoryParser::replaceHeaderText(const QString &fileContent, const QString &spdxExpression) const 0110 { 0111 auto regexps = m_registry.headerTextRegExps(spdxExpression); 0112 QString outputExpression = spdxExpression; 0113 outputExpression.replace('_', ' '); 0114 QString spdxOutputString = "SPDX-License-Identifier: " + outputExpression; 0115 QString newContent = fileContent; 0116 0117 // replace by longest match 0118 QRegularExpression bestMatchingExpr = regexps.first(); 0119 int bestCapturedLength = 0; 0120 for (auto regexp : regexps) { 0121 QRegularExpressionMatch match; 0122 if (newContent.contains(regexp, &match) && match.capturedLength() > bestCapturedLength) { 0123 bestMatchingExpr = regexp; 0124 bestCapturedLength = match.capturedLength(); 0125 } 0126 } 0127 newContent.replace(bestMatchingExpr, spdxOutputString); 0128 return newContent; 0129 } 0130 0131 LicenseRegistry::SpdxExpression DirectoryParser::detectSpdxLicenseStatement(const QString &fileContent) const 0132 { 0133 QRegularExpression regExp = spdxStatementRegExp(); 0134 auto match = regExp.match(fileContent); 0135 if (match.hasMatch()) { 0136 // TODO this very simple solution only works for SPDX expressions in our database 0137 // should be made more general 0138 return match.captured("expression").replace(' ', '_'); 0139 } 0140 return QString(); 0141 } 0142 0143 QVector<LicenseRegistry::SpdxExpression> DirectoryParser::pruneLicenseList(const QVector<LicenseRegistry::SpdxExpression> &inputLicenses) const 0144 { 0145 // TODO 0146 // - handle AND combinations 0147 // - handle complex OR combinations 0148 // - revisit operator preference order in SPDX and implement it here 0149 0150 if (inputLicenses.length() == 1) { 0151 return inputLicenses; 0152 } 0153 0154 auto licenses = inputLicenses; 0155 std::sort(licenses.begin(), licenses.end()); 0156 0157 // pruning step: remove duplicates 0158 licenses.erase(std::unique(licenses.begin(), licenses.end()), licenses.end()); 0159 0160 // pruning step: compute which licenses are supported with SPDX expression (splitting at "OR") 0161 // TODO this a very simple initial version and only works yet with simple license statements, not with multiple OR combinations 0162 QMap<LicenseRegistry::SpdxExpression, QVector<LicenseRegistry::SpdxExpression>> licenseClosure; 0163 for (const auto &license : qAsConst(licenses)) { 0164 QVector<LicenseRegistry::SpdxExpression> licenseChoice = license.split("_OR_").toVector(); 0165 0166 // remove all "WITH" statements 0167 for (int i = 0; i < licenseChoice.size(); ++i) { 0168 licenseChoice[i].remove(QRegularExpression("_WITH.*")); 0169 } 0170 licenseClosure[license] = licenseChoice; 0171 } 0172 QMutableVectorIterator<LicenseRegistry::SpdxExpression> iter(licenses); 0173 while (iter.hasNext()) { 0174 bool licensedContainedInClosure {false}; 0175 LicenseRegistry::SpdxExpression expression = iter.next(); 0176 for (auto iter = licenseClosure.begin(); iter != licenseClosure.end(); ++iter) { 0177 if (expression != iter.key() && iter.value().contains(expression)) { 0178 licensedContainedInClosure = true; 0179 } 0180 } 0181 if (licensedContainedInClosure) { 0182 iter.remove(); 0183 } 0184 } 0185 return licenses; 0186 } 0187 0188 QVector<LicenseRegistry::SpdxExpression> DirectoryParser::detectLicenses(const QString &fileContent) const 0189 { 0190 switch (m_parserType) { 0191 case DirectoryParser::LicenseParser::REGEXP_PARSER: 0192 return detectLicensesRegexpParser(fileContent); 0193 case DirectoryParser::LicenseParser::SKIP_PARSER: 0194 return detectLicensesSkipParser(fileContent); 0195 } 0196 return {}; 0197 } 0198 0199 QVector<LicenseRegistry::SpdxExpression> DirectoryParser::detectLicensesSkipParser(const QString &fileContent) const 0200 { 0201 SkipParser parser; 0202 QVector<LicenseRegistry::SpdxExpression> testExpressions = m_registry.expressions(); 0203 QVector<LicenseRegistry::SpdxExpression> detectedLicenses; 0204 for (auto expression : testExpressions) { 0205 auto match = parser.findMatch(fileContent, m_registry.headerTexts(expression)); 0206 if (match) { 0207 detectedLicenses << expression; 0208 } 0209 } 0210 LicenseRegistry::SpdxExpression spdxStatement = detectSpdxLicenseStatement(fileContent); 0211 if (!spdxStatement.isEmpty()) { 0212 detectedLicenses << spdxStatement; 0213 } 0214 return detectedLicenses; 0215 } 0216 0217 QVector<LicenseRegistry::SpdxExpression> DirectoryParser::detectLicensesRegexpParser(const QString &fileContent) const 0218 { 0219 QVector<LicenseRegistry::SpdxExpression> testExpressions = m_registry.expressions(); 0220 QVector<LicenseRegistry::SpdxExpression> detectedLicenses; 0221 for (auto expression : testExpressions) { 0222 auto regexps = m_registry.headerTextRegExps(expression); 0223 for (auto regexp : regexps) { 0224 if (fileContent.contains(regexp)) { 0225 detectedLicenses << expression; 0226 } 0227 } 0228 } 0229 LicenseRegistry::SpdxExpression spdxStatement = detectSpdxLicenseStatement(fileContent); 0230 if (!spdxStatement.isEmpty()) { 0231 detectedLicenses << spdxStatement; 0232 } 0233 return detectedLicenses; 0234 } 0235 0236 QMap<QString, LicenseRegistry::SpdxExpression> DirectoryParser::parseAll(const QString &directory, bool convertMode, const QString &ignorePattern) const 0237 { 0238 QVector<LicenseRegistry::SpdxExpression> expressions = m_registry.expressions(); 0239 QMap<QString, LicenseRegistry::SpdxExpression> results; 0240 0241 if (convertMode) { 0242 qInfo() << "Running parser in CONVERT mode: every found license will be replaced with SPDX identifiers"; 0243 } 0244 0245 QStringList missingLicenseHeaderBlacklist; 0246 { 0247 QFile file(":/annotations/missing-headers-blacklist.txt"); 0248 file.open(QIODevice::ReadOnly); 0249 QTextStream in(&file); 0250 QString line; 0251 while (in.readLineInto(&line)) { 0252 missingLicenseHeaderBlacklist.append(line); 0253 } 0254 } 0255 QStringList missingLicenseHeaderGeneratedFileBlacklist; 0256 { 0257 QFile file(":/annotations/generated-files.txt"); 0258 file.open(QIODevice::ReadOnly); 0259 QTextStream in(&file); 0260 QString line; 0261 while (in.readLineInto(&line)) { 0262 missingLicenseHeaderGeneratedFileBlacklist.append(line); 0263 } 0264 } 0265 0266 QRegularExpression ignoreFile(ignorePattern); 0267 0268 QDirIterator iterator(directory, QDirIterator::Subdirectories); 0269 while (iterator.hasNext()) { 0270 QFile file(iterator.next()); 0271 if (shallIgnoreFile(iterator, ignoreFile)) { 0272 continue; 0273 } 0274 0275 bool skip = true; 0276 for (const auto &ending : DirectoryParser::s_supportedExtensions) { 0277 if (file.fileName().endsWith(ending)) { 0278 skip = false; 0279 break; 0280 } 0281 } 0282 if (skip) { 0283 continue; 0284 } 0285 0286 file.open(QIODevice::ReadOnly); 0287 const QString fileContent = file.readAll(); 0288 file.close(); 0289 0290 // qDebug() << "checking:" << iterator.fileInfo(); 0291 QVector<LicenseRegistry::SpdxExpression> licenses = detectLicenses(fileContent); 0292 licenses = pruneLicenseList(licenses); 0293 0294 if (licenses.count() == 1) { 0295 results.insert(iterator.fileInfo().filePath(), licenses.first()); 0296 // qDebug() << "---> " << iterator.fileInfo().filePath() << identifier; 0297 } else if (licenses.count() > 1) { 0298 qCritical() << "UNHANDLED MULTI-LICENSE CASE" << iterator.fileInfo().filePath() << "-->" << licenses; 0299 results[iterator.fileInfo().filePath()] = LicenseRegistry::AmbigiousLicense; 0300 } else { 0301 // if nothing matches, report error 0302 results.insert(iterator.fileInfo().filePath(), LicenseRegistry::UnknownLicense); 0303 0304 // check for blacklisted file because of missing license header only when no license was detected 0305 for (auto backlistPath : missingLicenseHeaderBlacklist) { 0306 if (iterator.fileInfo().filePath().endsWith(backlistPath)) { 0307 results.insert(iterator.fileInfo().filePath(), LicenseRegistry::MissingLicense); 0308 break; 0309 } 0310 } 0311 for (auto backlistPath : missingLicenseHeaderGeneratedFileBlacklist) { 0312 if (iterator.fileInfo().filePath().endsWith(backlistPath)) { 0313 results.insert(iterator.fileInfo().filePath(), LicenseRegistry::MissingLicenseForGeneratedFile); 0314 break; 0315 } 0316 } 0317 } 0318 0319 const QString expression = results.value(iterator.fileInfo().filePath()); 0320 if (convertMode && !m_registry.isFakeLicenseMarker(expression)) { 0321 QString newContent = replaceHeaderText(fileContent, expression); 0322 // qDebug() << newContent; 0323 file.open(QIODevice::WriteOnly); 0324 file.write(newContent.toUtf8()); 0325 file.close(); 0326 } 0327 } 0328 0329 if (convertMode) { 0330 // compute needed licenses 0331 QSet<QString> identifiers; 0332 for (const auto &expression : results.values()) { 0333 auto expressionSplit = expression.split('_'); 0334 for (const auto &identifier : expressionSplit) { 0335 // remove SPDX syntax attributes 0336 if (identifier == "OR" || identifier == "AND" || identifier == "WITH") { 0337 continue; 0338 } 0339 // remove special placeholders 0340 if (m_registry.isFakeLicenseMarker(identifier)) { 0341 continue; 0342 } 0343 identifiers.insert(identifier); 0344 } 0345 } 0346 // create licenses directory and put license files therein 0347 QString licenseDir = directory + "/LICENSES/"; 0348 QDir().mkdir(licenseDir); 0349 const auto licenseFiles = m_registry.licenseFiles(); 0350 for (const auto &identifier : identifiers) { 0351 qDebug() << "Deploy license file" << identifier << licenseFiles.value(identifier); 0352 QFile::copy(licenseFiles.value(identifier), licenseDir + identifier + ".txt"); 0353 } 0354 } 0355 0356 return results; 0357 } 0358 0359 void DirectoryParser::convertCopyright(const QString &directory, ConvertOptions options, const QString &ignorePattern) const 0360 { 0361 QRegularExpression ignoreFile(ignorePattern); 0362 0363 QDirIterator iterator(directory, QDirIterator::Subdirectories); 0364 while (iterator.hasNext()) { 0365 QFile file(iterator.next()); 0366 0367 qInfo() << "Processing file:" << file.fileName(); 0368 0369 if (shallIgnoreFile(iterator, ignoreFile)) { 0370 qInfo() << "\tAsked to be ignored, skipping."; 0371 continue; 0372 } 0373 bool skip = true; 0374 for (const auto &ending : DirectoryParser::s_supportedExtensions) { 0375 if (file.fileName().endsWith(ending)) { 0376 skip = false; 0377 break; 0378 } 0379 } 0380 if (skip) { 0381 qInfo() << "\tUnsupported extension, skipping."; 0382 continue; 0383 } 0384 0385 file.open(QIODevice::ReadOnly); 0386 QString content = file.readAll(); 0387 file.close(); 0388 if (options & ConvertOption::COPYRIGHT_TEXT) { 0389 content = unifyCopyrightStatements(content); 0390 } 0391 if (options & ConvertOption::PRETTY) { 0392 content = unifyCopyrightCommentHeader(content); 0393 } 0394 file.open(QIODevice::WriteOnly); 0395 file.write(content.toUtf8()); 0396 file.close(); 0397 } 0398 }