File indexing completed on 2024-04-21 03:57:48

0001 /*
0002     SPDX-FileCopyrightText: 2022 Waqar Ahmed <waqar.17a@gmail.com>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 #include "kateindentdetecter.h"
0007 
0008 #include "katedocument.h"
0009 
0010 KateIndentDetecter::KateIndentDetecter(KTextEditor::DocumentPrivate *doc)
0011     : m_doc(doc)
0012 {
0013 }
0014 
0015 struct SpacesDiffResult {
0016     int spacesDiff = 0;
0017     bool looksLikeAlignment = false;
0018 };
0019 
0020 static SpacesDiffResult spacesDiff(const QString &a, int aLength, const QString &b, int bLength)
0021 {
0022     SpacesDiffResult result;
0023     result.spacesDiff = 0;
0024     result.looksLikeAlignment = false;
0025 
0026     // This can go both ways (e.g.):
0027     //  - a: "\t"
0028     //  - b: "\t    "
0029     //  => This should count 1 tab and 4 spaces
0030 
0031     int i = 0;
0032 
0033     for (i = 0; i < aLength && i < bLength; i++) {
0034         const auto aCharCode = a.at(i);
0035         const auto bCharCode = b.at(i);
0036 
0037         if (aCharCode != bCharCode) {
0038             break;
0039         }
0040     }
0041 
0042     int aSpacesCnt = 0;
0043     int aTabsCount = 0;
0044     for (int j = i; j < aLength; j++) {
0045         const auto aCharCode = a.at(j);
0046         if (aCharCode == QLatin1Char(' ')) {
0047             aSpacesCnt++;
0048         } else {
0049             aTabsCount++;
0050         }
0051     }
0052 
0053     int bSpacesCnt = 0;
0054     int bTabsCount = 0;
0055     for (int j = i; j < bLength; j++) {
0056         const auto bCharCode = b.at(j);
0057         if (bCharCode == QLatin1Char(' ')) {
0058             bSpacesCnt++;
0059         } else {
0060             bTabsCount++;
0061         }
0062     }
0063 
0064     if (aSpacesCnt > 0 && aTabsCount > 0) {
0065         return result;
0066     }
0067     if (bSpacesCnt > 0 && bTabsCount > 0) {
0068         return result;
0069     }
0070 
0071     const auto tabsDiff = std::abs(aTabsCount - bTabsCount);
0072     const auto spacesDiff = std::abs(aSpacesCnt - bSpacesCnt);
0073 
0074     if (tabsDiff == 0) {
0075         // check if the indentation difference might be caused by alignment reasons
0076         // sometime folks like to align their code, but this should not be used as a hint
0077         result.spacesDiff = spacesDiff;
0078 
0079         if (spacesDiff > 0 && 0 <= bSpacesCnt - 1 && bSpacesCnt - 1 < a.length() && bSpacesCnt < b.length()) {
0080             if (b.at(bSpacesCnt) != QLatin1Char(' ') && a.at(bSpacesCnt - 1) == QLatin1Char(' ')) {
0081                 if (a.at(a.length() - 1) == QLatin1Char(',')) {
0082                     // This looks like an alignment desire: e.g.
0083                     // const a = b + c,
0084                     //       d = b - c;
0085                     result.looksLikeAlignment = true;
0086                 }
0087             }
0088         }
0089         return result;
0090     }
0091     if (spacesDiff % tabsDiff == 0) {
0092         result.spacesDiff = spacesDiff / tabsDiff;
0093         return result;
0094     }
0095     return result;
0096 }
0097 
0098 KateIndentDetecter::Result KateIndentDetecter::detect(int defaultTabSize, bool defaultInsertSpaces)
0099 {
0100     // Look at most at the first 10k lines
0101     const int linesCount = std::min(m_doc->lines(), 10000);
0102 
0103     int linesIndentedWithTabsCount = 0; // number of lines that contain at least one tab in indentation
0104     int linesIndentedWithSpacesCount = 0; // number of lines that contain only spaces in indentation
0105 
0106     QString previousLineText; // content of latest line that contained non-whitespace chars
0107     int previousLineIndentation = 0; // index at which latest line contained the first non-whitespace char
0108 
0109     constexpr int ALLOWED_TAB_SIZE_GUESSES[7] = {2, 4, 6, 8, 3, 5, 7}; // prefer even guesses for `tabSize`, limit to [2, 8].
0110     constexpr int MAX_ALLOWED_TAB_SIZE_GUESS = 8; // max(ALLOWED_TAB_SIZE_GUESSES) = 8
0111 
0112     int spacesDiffCount[] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; // `tabSize` scores
0113     SpacesDiffResult tmp;
0114 
0115     for (int lineNumber = 0; lineNumber < linesCount; lineNumber++) {
0116         const QString currentLineText = m_doc->line(lineNumber);
0117         const int currentLineLength = currentLineText.length();
0118 
0119         bool currentLineHasContent = false; // does `currentLineText` contain non-whitespace chars
0120         int currentLineIndentation = 0; // index at which `currentLineText` contains the first non-whitespace char
0121         int currentLineSpacesCount = 0; // count of spaces found in `currentLineText` indentation
0122         int currentLineTabsCount = 0; // count of tabs found in `currentLineText` indentation
0123         for (int j = 0, lenJ = currentLineLength; j < lenJ; j++) {
0124             const auto charCode = currentLineText.at(j);
0125 
0126             if (charCode == QLatin1Char('\t')) {
0127                 currentLineTabsCount++;
0128             } else if (charCode == QLatin1Char(' ')) {
0129                 currentLineSpacesCount++;
0130             } else {
0131                 // Hit non whitespace character on this line
0132                 currentLineHasContent = true;
0133                 currentLineIndentation = j;
0134                 break;
0135             }
0136         }
0137 
0138         // Ignore empty or only whitespace lines
0139         if (!currentLineHasContent) {
0140             continue;
0141         }
0142 
0143         if (currentLineTabsCount > 0) {
0144             linesIndentedWithTabsCount++;
0145         } else if (currentLineSpacesCount > 1) {
0146             linesIndentedWithSpacesCount++;
0147         }
0148 
0149         tmp = spacesDiff(previousLineText, previousLineIndentation, currentLineText, currentLineIndentation);
0150 
0151         if (tmp.looksLikeAlignment) {
0152             // if defaultInsertSpaces === true && the spaces count == tabSize, we may want to count it as valid indentation
0153             //
0154             // - item1
0155             //   - item2
0156             //
0157             // otherwise skip this line entirely
0158             //
0159             // const a = 1,
0160             //       b = 2;
0161 
0162             if (!(defaultInsertSpaces && defaultTabSize == tmp.spacesDiff)) {
0163                 continue;
0164             }
0165         }
0166 
0167         const int currentSpacesDiff = tmp.spacesDiff;
0168         if (currentSpacesDiff <= MAX_ALLOWED_TAB_SIZE_GUESS) {
0169             spacesDiffCount[currentSpacesDiff]++;
0170         }
0171 
0172         previousLineText = currentLineText;
0173         previousLineIndentation = currentLineIndentation;
0174     }
0175 
0176     bool insertSpaces = defaultInsertSpaces;
0177     if (linesIndentedWithTabsCount != linesIndentedWithSpacesCount) {
0178         insertSpaces = (linesIndentedWithTabsCount < linesIndentedWithSpacesCount);
0179     }
0180 
0181     int tabSize = defaultTabSize;
0182 
0183     // Guess tabSize only if inserting spaces...
0184     if (insertSpaces) {
0185         int tabSizeScore = 0;
0186         for (int i = 0; i < 7; ++i) {
0187             int possibleTabSize = ALLOWED_TAB_SIZE_GUESSES[i];
0188             const int possibleTabSizeScore = spacesDiffCount[possibleTabSize];
0189             if (possibleTabSizeScore > tabSizeScore) {
0190                 tabSizeScore = possibleTabSizeScore;
0191                 tabSize = possibleTabSize;
0192             }
0193         }
0194 
0195         // Let a tabSize of 2 win even if it is not the maximum
0196         // (only in case 4 was guessed)
0197         if (tabSize == 4 && spacesDiffCount[4] > 0 && spacesDiffCount[2] > 0 && spacesDiffCount[2] >= spacesDiffCount[4] / 2) {
0198             tabSize = 2;
0199         }
0200 
0201         // If no indent detected, check if the file is 1 space indented
0202         if (tabSizeScore == 0) {
0203             const auto it = std::max_element(spacesDiffCount, spacesDiffCount + 9);
0204             const auto maxIdx = std::distance(spacesDiffCount, it);
0205             if (maxIdx == 1) {
0206                 tabSize = 1;
0207             }
0208         }
0209     }
0210 
0211     return {tabSize, insertSpaces};
0212 }