File indexing completed on 2024-12-22 04:09:06
0001 /* 0002 * SPDX-FileCopyrightText: 2022 Wolthera van Hövell tot Westerflier <griffinvalley@gmail.com> 0003 * 0004 * SPDX-License-Identifier: GPL-2.0-or-later 0005 */ 0006 #ifndef KOCSSTEXTUTILS_H 0007 #define KOCSSTEXTUTILS_H 0008 0009 #include <KoSvgText.h> 0010 #include <QDebug> 0011 #include <QLocale> 0012 #include <QString> 0013 0014 #include "kritaflake_export.h" 0015 /** 0016 * @brief The KoCssTextUtils class 0017 * 0018 * This class keeps a number of utility functions related to CSS Text, 0019 * in particular CSS-Text-3 and CSS-Text-4. 0020 */ 0021 class KRITAFLAKE_EXPORT KoCssTextUtils 0022 { 0023 public: 0024 /** 0025 * @brief transformTextToUpperCase 0026 * convenience function that creates a QLocale and uses it's 'toUpper' 0027 * function. Note: When building Qt without ICU, this uses platform 0028 * dependant functions. 0029 * 0030 * @param text the text to transform. 0031 * @param langCode the language code in BCP format, it gets transformed to 0032 * qLocale's format. 0033 * @return the transformed string. 0034 */ 0035 static QString transformTextToUpperCase(const QString &text, const QString &langCode, QVector<QPair<int, int>> &positions); 0036 0037 /** 0038 * @brief transformTextToUpperCase 0039 * convenience function that creates a QLocale and uses it's 'toLower' 0040 * function. Note: When building Qt without ICU, this uses platform 0041 * dependant functions. 0042 * 0043 * @param text the text to transform. 0044 * @param langCode the language code in BCP format, it gets transformed to 0045 * qLocale's format. 0046 * @return the transformed string. 0047 */ 0048 static QString transformTextToLowerCase(const QString &text, const QString &langCode, QVector<QPair<int, int>> &positions); 0049 0050 /** 0051 * @brief transformTextToUpperCase 0052 * This function splits the text into graphemes, and then uses 0053 * QLocale::toUpper for each letter following a whitespace character or CSS 0054 * Wordseparator. It has a small codepath for transforming the Dutch IJ 0055 * correctly, as this is more readable. Note: When building Qt without ICU, 0056 * this uses platform dependant functions. 0057 * 0058 * @param text the text to transform. 0059 * @param langCode the language code in BCP format, it gets transformed to 0060 * qLocale's format. 0061 * @return the transformed string. 0062 */ 0063 static QString transformTextCapitalize(const QString &text, QString langCode, QVector<QPair<int, int>> &positions); 0064 0065 /** 0066 * @brief transformTextFullWidth 0067 * This function will transform 'narrow' or 'halfwidth' characters to their 0068 * normal counterparts, and will transform ascii characters to their 0069 * 'fullwidth'/'ideographic' counterparts. 0070 * 0071 * @param text the text to transform. 0072 * @return the transformed text. 0073 */ 0074 static QString transformTextFullWidth(const QString &text); 0075 /** 0076 * @brief transformTextFullSizeKana 0077 * This function will take 'small' Kana (Japanese phonetic script) and 0078 * transform it to their 'full-size' equivelants, following the list in the 0079 * CSS-Text-3 spec. 0080 * 0081 * @param text the text to transform. 0082 * @return the transformed text. 0083 */ 0084 static QString transformTextFullSizeKana(const QString &text); 0085 0086 /** 0087 * @brief collapseSpaces 0088 * Some versions of CSS-Text 'white-space' or 'text-space-collapse' will 0089 * collapse or transform white space characters while others don't. This 0090 * function returns whether that's the case. 0091 * 0092 * @param text the text to check against, this text will be transformed if 0093 * the collapse method requires that. 0094 * @param collapseMethod the white-space/text-space-collapse method. 0095 * @return A vector of booleans the size of the input text that marks 0096 * whether the character should be collapsed. 0097 */ 0098 static QVector<bool> collapseSpaces(QString *text, KoSvgText::TextSpaceCollapse collapseMethod); 0099 0100 /** 0101 * @brief collapseLastSpace 0102 * Some versions of CSS-Text 'white-space' or 'text-space-collapse' will 0103 * collapse the last spaces while others don't. This function returns 0104 * whether that's the case. 0105 * 0106 * @param c the character to check. 0107 * @param collapseMethod the text-space collapse type. 0108 * @return whether the character should collapse if it's the last space in a 0109 * line. 0110 */ 0111 static bool collapseLastSpace(QChar c, KoSvgText::TextSpaceCollapse collapseMethod); 0112 0113 /** 0114 * @brief hangLastSpace 0115 * Some versions of CSS-Text 'white-space' or 'text-space-collapse' will 0116 * hang the final space depending on the situation. 0117 * @param c the character in question. 0118 * @param collapseMethod the collapse method 0119 * @param wrapMethod the wrap method. 0120 * @param force whether said hang is a forced hang or not. 0121 * @param nextCharIsHardBreak whether the next char is a line break. 0122 * @return 0123 */ 0124 static bool hangLastSpace(const QChar c, 0125 KoSvgText::TextSpaceCollapse collapseMethod, 0126 KoSvgText::TextWrap wrapMethod, 0127 bool &force, bool nextCharIsHardBreak); 0128 0129 /** 0130 * @brief characterCanHang 0131 * The function returns whether the character qualifies for 0132 * 'hanging-punctuation', using the given hang-type. 0133 * 0134 * @param c the character to check. 0135 * @param hangType how to hang. 0136 * @return whether the character can hang. 0137 */ 0138 static bool characterCanHang(QChar c, KoSvgText::HangingPunctuations hangType); 0139 0140 /** 0141 * @brief IsCssWordSeparator 0142 * CSS has a number of characters it considers word-separators, which are 0143 * used in justification and for word-spacing. 0144 * 0145 * @param grapheme a grapheme to check. Using graphemes here, because some 0146 * of the word-separators are not in the unicode basic plane. 0147 * @return true if it is a word-separator 0148 */ 0149 static bool IsCssWordSeparator(QString grapheme); 0150 0151 /** 0152 * @brief textToUnicodeGraphemes 0153 * In letters like Å, the amount of unicode codpoints can be 1, but it can 0154 * also be 2, one for 'A', and one for 'Combining Mark Ring Above". In some 0155 * letters used by Vietnamese, such as ỗ there can be even 3. Such codepoint 0156 * sequences are considered 'grapheme-clusters'. For editing text, matching 0157 * fonts or capitalizing the first letter, it's wisest to do so on the 0158 * grapheme clusters instead of the individual codepoints. 0159 * 0160 * @param text the text to break. 0161 * @param langCode the language code of the text, BCP style. 0162 * @return a QStringList of the graphemes as seperate strings. 0163 */ 0164 static QStringList textToUnicodeGraphemeClusters(const QString &text, const QString &langCode); 0165 0166 /** 0167 * @brief justificationOpportunities 0168 * mark justification opportunities in the text. Opportunities are between 0169 * characters, so this returns a pair of before and after. 0170 * As of currently, this only implements the bare minimum for CSS-Text-3 0171 * auto justification. 0172 * @param text the text to check against. 0173 * @param langCode language, used for the grapheme breaking. 0174 * @return a list of booleans for whether the current codePoint represents a justificaton opportunity. 0175 */ 0176 static QVector<QPair<bool, bool>> justificationOpportunities(QString text, QString langCode); 0177 0178 /** 0179 * @brief getBidiOpening 0180 * Get the bidi opening string associated with the given Css unicode-bidi value and direction 0181 * https://www.w3.org/TR/css-writing-modes-3/#unicode-bidi 0182 * @param ltr -- whether the direction is left-to-right 0183 * @param bidi -- the unicodee-bidi value. 0184 * @return string with bidi opening marks. 0185 */ 0186 static QString getBidiOpening(bool ltr, KoSvgText::UnicodeBidi bidi); 0187 0188 /** 0189 * @brief getBidiClosing 0190 * Returns the bidi closing string associated with the given Css unicode-bidi value. 0191 * @param bidi -- the unicode-bidi value 0192 * @return string with bidi closing marks. 0193 */ 0194 static QString getBidiClosing(KoSvgText::UnicodeBidi bidi); 0195 0196 /** 0197 * @brief removeText 0198 * Special removal of text that takes a text, start and length and will modify these values 0199 * so that... 0200 * - Whole code points are deleted at any time, avoiding 0201 * no dangling surrogates. 0202 * - Graphemes don't end with Zero-width-joiners, as that can lead 0203 * to the grapheme merging with the next. 0204 * - Variation selectors are deleted along their base. 0205 * - regional sequences are deleted in pairs. 0206 * @param text text to remove text from. 0207 * @param start the start index, will be modified. 0208 * @param length the length. 0209 */ 0210 static void removeText(QString &text, int &start, int length); 0211 }; 0212 0213 #endif // KOCSSTEXTUTILS_H