File indexing completed on 2024-04-28 15:30:55

0001 /*
0002     SPDX-FileCopyrightText: 2022 Waqar Ahmed <waqar.17a@gmail.com>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 #include "kateindentdetecter.h"
0007 
0008 KateIndentDetecter::KateIndentDetecter(KTextEditor::DocumentPrivate *doc)
0009     : m_doc(doc)
0010 {
0011 }
0012 
0013 struct SpacesDiffResult {
0014     int spacesDiff = 0;
0015     bool looksLikeAlignment = false;
0016 };
0017 
0018 static SpacesDiffResult spacesDiff(const QString &a, int aLength, const QString &b, int bLength)
0019 {
0020     SpacesDiffResult result;
0021     result.spacesDiff = 0;
0022     result.looksLikeAlignment = false;
0023 
0024     // This can go both ways (e.g.):
0025     //  - a: "\t"
0026     //  - b: "\t    "
0027     //  => This should count 1 tab and 4 spaces
0028 
0029     int i = 0;
0030 
0031     for (i = 0; i < aLength && i < bLength; i++) {
0032         const auto aCharCode = a.at(i);
0033         const auto bCharCode = b.at(i);
0034 
0035         if (aCharCode != bCharCode) {
0036             break;
0037         }
0038     }
0039 
0040     int aSpacesCnt = 0;
0041     int aTabsCount = 0;
0042     for (int j = i; j < aLength; j++) {
0043         const auto aCharCode = a.at(j);
0044         if (aCharCode == QLatin1Char(' ')) {
0045             aSpacesCnt++;
0046         } else {
0047             aTabsCount++;
0048         }
0049     }
0050 
0051     int bSpacesCnt = 0;
0052     int bTabsCount = 0;
0053     for (int j = i; j < bLength; j++) {
0054         const auto bCharCode = b.at(j);
0055         if (bCharCode == QLatin1Char(' ')) {
0056             bSpacesCnt++;
0057         } else {
0058             bTabsCount++;
0059         }
0060     }
0061 
0062     if (aSpacesCnt > 0 && aTabsCount > 0) {
0063         return result;
0064     }
0065     if (bSpacesCnt > 0 && bTabsCount > 0) {
0066         return result;
0067     }
0068 
0069     const auto tabsDiff = std::abs(aTabsCount - bTabsCount);
0070     const auto spacesDiff = std::abs(aSpacesCnt - bSpacesCnt);
0071 
0072     if (tabsDiff == 0) {
0073         // check if the indentation difference might be caused by alignment reasons
0074         // sometime folks like to align their code, but this should not be used as a hint
0075         result.spacesDiff = spacesDiff;
0076 
0077         if (spacesDiff > 0 && 0 <= bSpacesCnt - 1 && bSpacesCnt - 1 < a.length() && bSpacesCnt < b.length()) {
0078             if (b.at(bSpacesCnt) != QLatin1Char(' ') && a.at(bSpacesCnt - 1) == QLatin1Char(' ')) {
0079                 if (a.at(a.length() - 1) == QLatin1Char(',')) {
0080                     // This looks like an alignment desire: e.g.
0081                     // const a = b + c,
0082                     //       d = b - c;
0083                     result.looksLikeAlignment = true;
0084                 }
0085             }
0086         }
0087         return result;
0088     }
0089     if (spacesDiff % tabsDiff == 0) {
0090         result.spacesDiff = spacesDiff / tabsDiff;
0091         return result;
0092     }
0093     return result;
0094 }
0095 
0096 KateIndentDetecter::Result KateIndentDetecter::detect(int defaultTabSize, bool defaultInsertSpaces)
0097 {
0098     // Look at most at the first 10k lines
0099     const int linesCount = std::min(m_doc->lines(), 10000);
0100 
0101     int linesIndentedWithTabsCount = 0; // number of lines that contain at least one tab in indentation
0102     int linesIndentedWithSpacesCount = 0; // number of lines that contain only spaces in indentation
0103 
0104     QString previousLineText; // content of latest line that contained non-whitespace chars
0105     int previousLineIndentation = 0; // index at which latest line contained the first non-whitespace char
0106 
0107     constexpr int ALLOWED_TAB_SIZE_GUESSES[7] = {2, 4, 6, 8, 3, 5, 7}; // prefer even guesses for `tabSize`, limit to [2, 8].
0108     constexpr int MAX_ALLOWED_TAB_SIZE_GUESS = 8; // max(ALLOWED_TAB_SIZE_GUESSES) = 8
0109 
0110     int spacesDiffCount[] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; // `tabSize` scores
0111     SpacesDiffResult tmp;
0112 
0113     for (int lineNumber = 0; lineNumber < linesCount; lineNumber++) {
0114         const QString currentLineText = m_doc->line(lineNumber);
0115         const int currentLineLength = currentLineText.length();
0116 
0117         bool currentLineHasContent = false; // does `currentLineText` contain non-whitespace chars
0118         int currentLineIndentation = 0; // index at which `currentLineText` contains the first non-whitespace char
0119         int currentLineSpacesCount = 0; // count of spaces found in `currentLineText` indentation
0120         int currentLineTabsCount = 0; // count of tabs found in `currentLineText` indentation
0121         for (int j = 0, lenJ = currentLineLength; j < lenJ; j++) {
0122             const auto charCode = currentLineText.at(j);
0123 
0124             if (charCode == QLatin1Char('\t')) {
0125                 currentLineTabsCount++;
0126             } else if (charCode == QLatin1Char(' ')) {
0127                 currentLineSpacesCount++;
0128             } else {
0129                 // Hit non whitespace character on this line
0130                 currentLineHasContent = true;
0131                 currentLineIndentation = j;
0132                 break;
0133             }
0134         }
0135 
0136         // Ignore empty or only whitespace lines
0137         if (!currentLineHasContent) {
0138             continue;
0139         }
0140 
0141         if (currentLineTabsCount > 0) {
0142             linesIndentedWithTabsCount++;
0143         } else if (currentLineSpacesCount > 1) {
0144             linesIndentedWithSpacesCount++;
0145         }
0146 
0147         tmp = spacesDiff(previousLineText, previousLineIndentation, currentLineText, currentLineIndentation);
0148 
0149         if (tmp.looksLikeAlignment) {
0150             // if defaultInsertSpaces === true && the spaces count == tabSize, we may want to count it as valid indentation
0151             //
0152             // - item1
0153             //   - item2
0154             //
0155             // otherwise skip this line entirely
0156             //
0157             // const a = 1,
0158             //       b = 2;
0159 
0160             if (!(defaultInsertSpaces && defaultTabSize == tmp.spacesDiff)) {
0161                 continue;
0162             }
0163         }
0164 
0165         const int currentSpacesDiff = tmp.spacesDiff;
0166         if (currentSpacesDiff <= MAX_ALLOWED_TAB_SIZE_GUESS) {
0167             spacesDiffCount[currentSpacesDiff]++;
0168         }
0169 
0170         previousLineText = currentLineText;
0171         previousLineIndentation = currentLineIndentation;
0172     }
0173 
0174     bool insertSpaces = defaultInsertSpaces;
0175     if (linesIndentedWithTabsCount != linesIndentedWithSpacesCount) {
0176         insertSpaces = (linesIndentedWithTabsCount < linesIndentedWithSpacesCount);
0177     }
0178 
0179     int tabSize = defaultTabSize;
0180 
0181     // Guess tabSize only if inserting spaces...
0182     if (insertSpaces) {
0183         int tabSizeScore = 0;
0184         for (int i = 0; i < 7; ++i) {
0185             int possibleTabSize = ALLOWED_TAB_SIZE_GUESSES[i];
0186             const int possibleTabSizeScore = spacesDiffCount[possibleTabSize];
0187             if (possibleTabSizeScore > tabSizeScore) {
0188                 tabSizeScore = possibleTabSizeScore;
0189                 tabSize = possibleTabSize;
0190             }
0191         }
0192 
0193         // Let a tabSize of 2 win even if it is not the maximum
0194         // (only in case 4 was guessed)
0195         if (tabSize == 4 && spacesDiffCount[4] > 0 && spacesDiffCount[2] > 0 && spacesDiffCount[2] >= spacesDiffCount[4] / 2) {
0196             tabSize = 2;
0197         }
0198 
0199         // If no indent detected, check if the file is 1 space indented
0200         if (tabSizeScore == 0) {
0201             const auto it = std::max_element(spacesDiffCount, spacesDiffCount + 9);
0202             const auto maxIdx = std::distance(spacesDiffCount, it);
0203             if (maxIdx == 1) {
0204                 tabSize = 1;
0205             }
0206         }
0207     }
0208 
0209     return {tabSize, insertSpaces};
0210 }