File indexing completed on 2024-04-28 15:30:55
0001 /* 0002 SPDX-FileCopyrightText: 2022 Waqar Ahmed <waqar.17a@gmail.com> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 #include "kateindentdetecter.h" 0007 0008 KateIndentDetecter::KateIndentDetecter(KTextEditor::DocumentPrivate *doc) 0009 : m_doc(doc) 0010 { 0011 } 0012 0013 struct SpacesDiffResult { 0014 int spacesDiff = 0; 0015 bool looksLikeAlignment = false; 0016 }; 0017 0018 static SpacesDiffResult spacesDiff(const QString &a, int aLength, const QString &b, int bLength) 0019 { 0020 SpacesDiffResult result; 0021 result.spacesDiff = 0; 0022 result.looksLikeAlignment = false; 0023 0024 // This can go both ways (e.g.): 0025 // - a: "\t" 0026 // - b: "\t " 0027 // => This should count 1 tab and 4 spaces 0028 0029 int i = 0; 0030 0031 for (i = 0; i < aLength && i < bLength; i++) { 0032 const auto aCharCode = a.at(i); 0033 const auto bCharCode = b.at(i); 0034 0035 if (aCharCode != bCharCode) { 0036 break; 0037 } 0038 } 0039 0040 int aSpacesCnt = 0; 0041 int aTabsCount = 0; 0042 for (int j = i; j < aLength; j++) { 0043 const auto aCharCode = a.at(j); 0044 if (aCharCode == QLatin1Char(' ')) { 0045 aSpacesCnt++; 0046 } else { 0047 aTabsCount++; 0048 } 0049 } 0050 0051 int bSpacesCnt = 0; 0052 int bTabsCount = 0; 0053 for (int j = i; j < bLength; j++) { 0054 const auto bCharCode = b.at(j); 0055 if (bCharCode == QLatin1Char(' ')) { 0056 bSpacesCnt++; 0057 } else { 0058 bTabsCount++; 0059 } 0060 } 0061 0062 if (aSpacesCnt > 0 && aTabsCount > 0) { 0063 return result; 0064 } 0065 if (bSpacesCnt > 0 && bTabsCount > 0) { 0066 return result; 0067 } 0068 0069 const auto tabsDiff = std::abs(aTabsCount - bTabsCount); 0070 const auto spacesDiff = std::abs(aSpacesCnt - bSpacesCnt); 0071 0072 if (tabsDiff == 0) { 0073 // check if the indentation difference might be caused by alignment reasons 0074 // sometime folks like to align their code, but this should not be used as a hint 0075 result.spacesDiff = spacesDiff; 0076 0077 if (spacesDiff > 0 && 0 <= bSpacesCnt - 1 && bSpacesCnt - 1 < a.length() && bSpacesCnt < b.length()) { 0078 if (b.at(bSpacesCnt) != QLatin1Char(' ') && a.at(bSpacesCnt - 1) == QLatin1Char(' ')) { 0079 if (a.at(a.length() - 1) == QLatin1Char(',')) { 0080 // This looks like an alignment desire: e.g. 0081 // const a = b + c, 0082 // d = b - c; 0083 result.looksLikeAlignment = true; 0084 } 0085 } 0086 } 0087 return result; 0088 } 0089 if (spacesDiff % tabsDiff == 0) { 0090 result.spacesDiff = spacesDiff / tabsDiff; 0091 return result; 0092 } 0093 return result; 0094 } 0095 0096 KateIndentDetecter::Result KateIndentDetecter::detect(int defaultTabSize, bool defaultInsertSpaces) 0097 { 0098 // Look at most at the first 10k lines 0099 const int linesCount = std::min(m_doc->lines(), 10000); 0100 0101 int linesIndentedWithTabsCount = 0; // number of lines that contain at least one tab in indentation 0102 int linesIndentedWithSpacesCount = 0; // number of lines that contain only spaces in indentation 0103 0104 QString previousLineText; // content of latest line that contained non-whitespace chars 0105 int previousLineIndentation = 0; // index at which latest line contained the first non-whitespace char 0106 0107 constexpr int ALLOWED_TAB_SIZE_GUESSES[7] = {2, 4, 6, 8, 3, 5, 7}; // prefer even guesses for `tabSize`, limit to [2, 8]. 0108 constexpr int MAX_ALLOWED_TAB_SIZE_GUESS = 8; // max(ALLOWED_TAB_SIZE_GUESSES) = 8 0109 0110 int spacesDiffCount[] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; // `tabSize` scores 0111 SpacesDiffResult tmp; 0112 0113 for (int lineNumber = 0; lineNumber < linesCount; lineNumber++) { 0114 const QString currentLineText = m_doc->line(lineNumber); 0115 const int currentLineLength = currentLineText.length(); 0116 0117 bool currentLineHasContent = false; // does `currentLineText` contain non-whitespace chars 0118 int currentLineIndentation = 0; // index at which `currentLineText` contains the first non-whitespace char 0119 int currentLineSpacesCount = 0; // count of spaces found in `currentLineText` indentation 0120 int currentLineTabsCount = 0; // count of tabs found in `currentLineText` indentation 0121 for (int j = 0, lenJ = currentLineLength; j < lenJ; j++) { 0122 const auto charCode = currentLineText.at(j); 0123 0124 if (charCode == QLatin1Char('\t')) { 0125 currentLineTabsCount++; 0126 } else if (charCode == QLatin1Char(' ')) { 0127 currentLineSpacesCount++; 0128 } else { 0129 // Hit non whitespace character on this line 0130 currentLineHasContent = true; 0131 currentLineIndentation = j; 0132 break; 0133 } 0134 } 0135 0136 // Ignore empty or only whitespace lines 0137 if (!currentLineHasContent) { 0138 continue; 0139 } 0140 0141 if (currentLineTabsCount > 0) { 0142 linesIndentedWithTabsCount++; 0143 } else if (currentLineSpacesCount > 1) { 0144 linesIndentedWithSpacesCount++; 0145 } 0146 0147 tmp = spacesDiff(previousLineText, previousLineIndentation, currentLineText, currentLineIndentation); 0148 0149 if (tmp.looksLikeAlignment) { 0150 // if defaultInsertSpaces === true && the spaces count == tabSize, we may want to count it as valid indentation 0151 // 0152 // - item1 0153 // - item2 0154 // 0155 // otherwise skip this line entirely 0156 // 0157 // const a = 1, 0158 // b = 2; 0159 0160 if (!(defaultInsertSpaces && defaultTabSize == tmp.spacesDiff)) { 0161 continue; 0162 } 0163 } 0164 0165 const int currentSpacesDiff = tmp.spacesDiff; 0166 if (currentSpacesDiff <= MAX_ALLOWED_TAB_SIZE_GUESS) { 0167 spacesDiffCount[currentSpacesDiff]++; 0168 } 0169 0170 previousLineText = currentLineText; 0171 previousLineIndentation = currentLineIndentation; 0172 } 0173 0174 bool insertSpaces = defaultInsertSpaces; 0175 if (linesIndentedWithTabsCount != linesIndentedWithSpacesCount) { 0176 insertSpaces = (linesIndentedWithTabsCount < linesIndentedWithSpacesCount); 0177 } 0178 0179 int tabSize = defaultTabSize; 0180 0181 // Guess tabSize only if inserting spaces... 0182 if (insertSpaces) { 0183 int tabSizeScore = 0; 0184 for (int i = 0; i < 7; ++i) { 0185 int possibleTabSize = ALLOWED_TAB_SIZE_GUESSES[i]; 0186 const int possibleTabSizeScore = spacesDiffCount[possibleTabSize]; 0187 if (possibleTabSizeScore > tabSizeScore) { 0188 tabSizeScore = possibleTabSizeScore; 0189 tabSize = possibleTabSize; 0190 } 0191 } 0192 0193 // Let a tabSize of 2 win even if it is not the maximum 0194 // (only in case 4 was guessed) 0195 if (tabSize == 4 && spacesDiffCount[4] > 0 && spacesDiffCount[2] > 0 && spacesDiffCount[2] >= spacesDiffCount[4] / 2) { 0196 tabSize = 2; 0197 } 0198 0199 // If no indent detected, check if the file is 1 space indented 0200 if (tabSizeScore == 0) { 0201 const auto it = std::max_element(spacesDiffCount, spacesDiffCount + 9); 0202 const auto maxIdx = std::distance(spacesDiffCount, it); 0203 if (maxIdx == 1) { 0204 tabSize = 1; 0205 } 0206 } 0207 } 0208 0209 return {tabSize, insertSpaces}; 0210 }