File indexing completed on 2024-05-05 07:59:55
0001 /* 0002 SPDX-FileCopyrightText: 2022 Waqar Ahmed <waqar.17a@gmail.com> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 #include "kateindentdetecter.h" 0007 0008 #include "katedocument.h" 0009 0010 KateIndentDetecter::KateIndentDetecter(KTextEditor::DocumentPrivate *doc) 0011 : m_doc(doc) 0012 { 0013 } 0014 0015 struct SpacesDiffResult { 0016 int spacesDiff = 0; 0017 bool looksLikeAlignment = false; 0018 }; 0019 0020 static SpacesDiffResult spacesDiff(const QString &a, int aLength, const QString &b, int bLength) 0021 { 0022 SpacesDiffResult result; 0023 result.spacesDiff = 0; 0024 result.looksLikeAlignment = false; 0025 0026 // This can go both ways (e.g.): 0027 // - a: "\t" 0028 // - b: "\t " 0029 // => This should count 1 tab and 4 spaces 0030 0031 int i = 0; 0032 0033 for (i = 0; i < aLength && i < bLength; i++) { 0034 const auto aCharCode = a.at(i); 0035 const auto bCharCode = b.at(i); 0036 0037 if (aCharCode != bCharCode) { 0038 break; 0039 } 0040 } 0041 0042 int aSpacesCnt = 0; 0043 int aTabsCount = 0; 0044 for (int j = i; j < aLength; j++) { 0045 const auto aCharCode = a.at(j); 0046 if (aCharCode == QLatin1Char(' ')) { 0047 aSpacesCnt++; 0048 } else { 0049 aTabsCount++; 0050 } 0051 } 0052 0053 int bSpacesCnt = 0; 0054 int bTabsCount = 0; 0055 for (int j = i; j < bLength; j++) { 0056 const auto bCharCode = b.at(j); 0057 if (bCharCode == QLatin1Char(' ')) { 0058 bSpacesCnt++; 0059 } else { 0060 bTabsCount++; 0061 } 0062 } 0063 0064 if (aSpacesCnt > 0 && aTabsCount > 0) { 0065 return result; 0066 } 0067 if (bSpacesCnt > 0 && bTabsCount > 0) { 0068 return result; 0069 } 0070 0071 const auto tabsDiff = std::abs(aTabsCount - bTabsCount); 0072 const auto spacesDiff = std::abs(aSpacesCnt - bSpacesCnt); 0073 0074 if (tabsDiff == 0) { 0075 // check if the indentation difference might be caused by alignment reasons 0076 // sometime folks like to align their code, but this should not be used as a hint 0077 result.spacesDiff = spacesDiff; 0078 0079 if (spacesDiff > 0 && 0 <= bSpacesCnt - 1 && bSpacesCnt - 1 < a.length() && bSpacesCnt < b.length()) { 0080 if (b.at(bSpacesCnt) != QLatin1Char(' ') && a.at(bSpacesCnt - 1) == QLatin1Char(' ')) { 0081 if (a.at(a.length() - 1) == QLatin1Char(',')) { 0082 // This looks like an alignment desire: e.g. 0083 // const a = b + c, 0084 // d = b - c; 0085 result.looksLikeAlignment = true; 0086 } 0087 } 0088 } 0089 return result; 0090 } 0091 if (spacesDiff % tabsDiff == 0) { 0092 result.spacesDiff = spacesDiff / tabsDiff; 0093 return result; 0094 } 0095 return result; 0096 } 0097 0098 KateIndentDetecter::Result KateIndentDetecter::detect(int defaultTabSize, bool defaultInsertSpaces) 0099 { 0100 // Look at most at the first 10k lines 0101 const int linesCount = std::min(m_doc->lines(), 10000); 0102 0103 int linesIndentedWithTabsCount = 0; // number of lines that contain at least one tab in indentation 0104 int linesIndentedWithSpacesCount = 0; // number of lines that contain only spaces in indentation 0105 0106 QString previousLineText; // content of latest line that contained non-whitespace chars 0107 int previousLineIndentation = 0; // index at which latest line contained the first non-whitespace char 0108 0109 constexpr int ALLOWED_TAB_SIZE_GUESSES[7] = {2, 4, 6, 8, 3, 5, 7}; // prefer even guesses for `tabSize`, limit to [2, 8]. 0110 constexpr int MAX_ALLOWED_TAB_SIZE_GUESS = 8; // max(ALLOWED_TAB_SIZE_GUESSES) = 8 0111 0112 int spacesDiffCount[] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; // `tabSize` scores 0113 SpacesDiffResult tmp; 0114 0115 for (int lineNumber = 0; lineNumber < linesCount; lineNumber++) { 0116 const QString currentLineText = m_doc->line(lineNumber); 0117 const int currentLineLength = currentLineText.length(); 0118 0119 bool currentLineHasContent = false; // does `currentLineText` contain non-whitespace chars 0120 int currentLineIndentation = 0; // index at which `currentLineText` contains the first non-whitespace char 0121 int currentLineSpacesCount = 0; // count of spaces found in `currentLineText` indentation 0122 int currentLineTabsCount = 0; // count of tabs found in `currentLineText` indentation 0123 for (int j = 0, lenJ = currentLineLength; j < lenJ; j++) { 0124 const auto charCode = currentLineText.at(j); 0125 0126 if (charCode == QLatin1Char('\t')) { 0127 currentLineTabsCount++; 0128 } else if (charCode == QLatin1Char(' ')) { 0129 currentLineSpacesCount++; 0130 } else { 0131 // Hit non whitespace character on this line 0132 currentLineHasContent = true; 0133 currentLineIndentation = j; 0134 break; 0135 } 0136 } 0137 0138 // Ignore empty or only whitespace lines 0139 if (!currentLineHasContent) { 0140 continue; 0141 } 0142 0143 if (currentLineTabsCount > 0) { 0144 linesIndentedWithTabsCount++; 0145 } else if (currentLineSpacesCount > 1) { 0146 linesIndentedWithSpacesCount++; 0147 } 0148 0149 tmp = spacesDiff(previousLineText, previousLineIndentation, currentLineText, currentLineIndentation); 0150 0151 if (tmp.looksLikeAlignment) { 0152 // if defaultInsertSpaces === true && the spaces count == tabSize, we may want to count it as valid indentation 0153 // 0154 // - item1 0155 // - item2 0156 // 0157 // otherwise skip this line entirely 0158 // 0159 // const a = 1, 0160 // b = 2; 0161 0162 if (!(defaultInsertSpaces && defaultTabSize == tmp.spacesDiff)) { 0163 continue; 0164 } 0165 } 0166 0167 const int currentSpacesDiff = tmp.spacesDiff; 0168 if (currentSpacesDiff <= MAX_ALLOWED_TAB_SIZE_GUESS) { 0169 spacesDiffCount[currentSpacesDiff]++; 0170 } 0171 0172 previousLineText = currentLineText; 0173 previousLineIndentation = currentLineIndentation; 0174 } 0175 0176 bool insertSpaces = defaultInsertSpaces; 0177 if (linesIndentedWithTabsCount != linesIndentedWithSpacesCount) { 0178 insertSpaces = (linesIndentedWithTabsCount < linesIndentedWithSpacesCount); 0179 } 0180 0181 int tabSize = defaultTabSize; 0182 0183 // Guess tabSize only if inserting spaces... 0184 if (insertSpaces) { 0185 int tabSizeScore = 0; 0186 for (int i = 0; i < 7; ++i) { 0187 int possibleTabSize = ALLOWED_TAB_SIZE_GUESSES[i]; 0188 const int possibleTabSizeScore = spacesDiffCount[possibleTabSize]; 0189 if (possibleTabSizeScore > tabSizeScore) { 0190 tabSizeScore = possibleTabSizeScore; 0191 tabSize = possibleTabSize; 0192 } 0193 } 0194 0195 // Let a tabSize of 2 win even if it is not the maximum 0196 // (only in case 4 was guessed) 0197 if (tabSize == 4 && spacesDiffCount[4] > 0 && spacesDiffCount[2] > 0 && spacesDiffCount[2] >= spacesDiffCount[4] / 2) { 0198 tabSize = 2; 0199 } 0200 0201 // If no indent detected, check if the file is 1 space indented 0202 if (tabSizeScore == 0) { 0203 const auto it = std::max_element(spacesDiffCount, spacesDiffCount + 9); 0204 const auto maxIdx = std::distance(spacesDiffCount, it); 0205 if (maxIdx == 1) { 0206 tabSize = 1; 0207 } 0208 } 0209 } 0210 0211 return {tabSize, insertSpaces}; 0212 }