File indexing completed on 2024-12-22 04:09:06

0001 /*
0002  *  SPDX-FileCopyrightText: 2022 Wolthera van Hövell tot Westerflier <griffinvalley@gmail.com>
0003  *
0004  *  SPDX-License-Identifier: GPL-2.0-or-later
0005  */
0006 #ifndef KOCSSTEXTUTILS_H
0007 #define KOCSSTEXTUTILS_H
0008 
0009 #include <KoSvgText.h>
0010 #include <QDebug>
0011 #include <QLocale>
0012 #include <QString>
0013 
0014 #include "kritaflake_export.h"
0015 /**
0016  * @brief The KoCssTextUtils class
0017  *
0018  * This class keeps a number of utility functions related to CSS Text,
0019  * in particular CSS-Text-3 and CSS-Text-4.
0020  */
0021 class KRITAFLAKE_EXPORT KoCssTextUtils
0022 {
0023 public:
0024     /**
0025      * @brief transformTextToUpperCase
0026      * convenience function that creates a QLocale and uses it's 'toUpper'
0027      * function. Note: When building Qt without ICU, this uses platform
0028      * dependant functions.
0029      *
0030      * @param text the text to transform.
0031      * @param langCode the language code in BCP format, it gets transformed to
0032      * qLocale's format.
0033      * @return the transformed string.
0034      */
0035     static QString transformTextToUpperCase(const QString &text, const QString &langCode, QVector<QPair<int, int>> &positions);
0036 
0037     /**
0038      * @brief transformTextToUpperCase
0039      * convenience function that creates a QLocale and uses it's 'toLower'
0040      * function. Note: When building Qt without ICU, this uses platform
0041      * dependant functions.
0042      *
0043      * @param text the text to transform.
0044      * @param langCode the language code in BCP format, it gets transformed to
0045      * qLocale's format.
0046      * @return the transformed string.
0047      */
0048     static QString transformTextToLowerCase(const QString &text, const QString &langCode, QVector<QPair<int, int>> &positions);
0049 
0050     /**
0051      * @brief transformTextToUpperCase
0052      * This function splits the text into graphemes, and then uses
0053      * QLocale::toUpper for each letter following a whitespace character or CSS
0054      * Wordseparator. It has a small codepath for transforming the Dutch IJ
0055      * correctly, as this is more readable. Note: When building Qt without ICU,
0056      * this uses platform dependant functions.
0057      *
0058      * @param text the text to transform.
0059      * @param langCode the language code in BCP format, it gets transformed to
0060      * qLocale's format.
0061      * @return the transformed string.
0062      */
0063     static QString transformTextCapitalize(const QString &text, QString langCode, QVector<QPair<int, int>> &positions);
0064 
0065     /**
0066      * @brief transformTextFullWidth
0067      * This function will transform 'narrow' or 'halfwidth' characters to their
0068      * normal counterparts, and will transform ascii characters to their
0069      * 'fullwidth'/'ideographic' counterparts.
0070      *
0071      * @param text the text to transform.
0072      * @return the transformed text.
0073      */
0074     static QString transformTextFullWidth(const QString &text);
0075     /**
0076      * @brief transformTextFullSizeKana
0077      * This function will take 'small' Kana (Japanese phonetic script) and
0078      * transform it to their 'full-size' equivelants, following the list in the
0079      * CSS-Text-3 spec.
0080      *
0081      * @param text the text to transform.
0082      * @return the transformed text.
0083      */
0084     static QString transformTextFullSizeKana(const QString &text);
0085 
0086     /**
0087      * @brief collapseSpaces
0088      * Some versions of CSS-Text 'white-space' or 'text-space-collapse' will
0089      * collapse or transform white space characters while others don't. This
0090      * function returns whether that's the case.
0091      *
0092      * @param text the text to check against, this text will be transformed if
0093      * the collapse method requires that.
0094      * @param collapseMethod the white-space/text-space-collapse method.
0095      * @return A vector of booleans the size of the input text that marks
0096      * whether the character should be collapsed.
0097      */
0098     static QVector<bool> collapseSpaces(QString *text, KoSvgText::TextSpaceCollapse collapseMethod);
0099 
0100     /**
0101      * @brief collapseLastSpace
0102      * Some versions of CSS-Text 'white-space' or 'text-space-collapse' will
0103      * collapse the last spaces while others don't. This function returns
0104      * whether that's the case.
0105      *
0106      * @param c the character to check.
0107      * @param collapseMethod the text-space collapse type.
0108      * @return whether the character should collapse if it's the last space in a
0109      * line.
0110      */
0111     static bool collapseLastSpace(QChar c, KoSvgText::TextSpaceCollapse collapseMethod);
0112 
0113     /**
0114      * @brief hangLastSpace
0115      * Some versions of CSS-Text 'white-space' or 'text-space-collapse' will
0116      * hang the final space depending on the situation.
0117      * @param c the character in question.
0118      * @param collapseMethod the collapse method
0119      * @param wrapMethod the wrap method.
0120      * @param force whether said hang is a forced hang or not.
0121      * @param nextCharIsHardBreak whether the next char is a line break.
0122      * @return
0123      */
0124     static bool hangLastSpace(const QChar c,
0125                               KoSvgText::TextSpaceCollapse collapseMethod,
0126                               KoSvgText::TextWrap wrapMethod,
0127                               bool &force, bool nextCharIsHardBreak);
0128 
0129     /**
0130      * @brief characterCanHang
0131      * The function returns whether the character qualifies for
0132      * 'hanging-punctuation', using the given hang-type.
0133      *
0134      * @param c the character to check.
0135      * @param hangType how to hang.
0136      * @return whether the character can hang.
0137      */
0138     static bool characterCanHang(QChar c, KoSvgText::HangingPunctuations hangType);
0139 
0140     /**
0141      * @brief IsCssWordSeparator
0142      * CSS has a number of characters it considers word-separators, which are
0143      * used in justification and for word-spacing.
0144      *
0145      * @param grapheme a grapheme to check. Using graphemes here, because some
0146      * of the word-separators are not in the unicode basic plane.
0147      * @return true if it is a word-separator
0148      */
0149     static bool IsCssWordSeparator(QString grapheme);
0150 
0151     /**
0152      * @brief textToUnicodeGraphemes
0153      * In letters like Å, the amount of unicode codpoints can be 1, but it can
0154      * also be 2, one for 'A', and one for 'Combining Mark Ring Above". In some
0155      * letters used by Vietnamese, such as ỗ there can be even 3. Such codepoint
0156      * sequences are considered 'grapheme-clusters'. For editing text, matching
0157      * fonts or capitalizing the first letter, it's wisest to do so on the
0158      * grapheme clusters instead of the individual codepoints.
0159      *
0160      * @param text the text to break.
0161      * @param langCode the language code of the text, BCP style.
0162      * @return a QStringList of the graphemes as seperate strings.
0163      */
0164     static QStringList textToUnicodeGraphemeClusters(const QString &text, const QString &langCode);
0165 
0166     /**
0167      * @brief justificationOpportunities
0168      * mark justification opportunities in the text. Opportunities are between
0169      * characters, so this returns a pair of before and after.
0170      * As of currently, this only implements the bare minimum for CSS-Text-3
0171      * auto justification.
0172      * @param text the text to check against.
0173      * @param langCode language, used for the grapheme breaking.
0174      * @return a list of booleans for whether the current codePoint represents a justificaton opportunity.
0175      */
0176     static QVector<QPair<bool, bool>> justificationOpportunities(QString text, QString langCode);
0177 
0178     /**
0179      * @brief getBidiOpening
0180      * Get the bidi opening string associated with the given Css unicode-bidi value and direction
0181      * https://www.w3.org/TR/css-writing-modes-3/#unicode-bidi
0182      * @param ltr -- whether the direction is left-to-right
0183      * @param bidi -- the unicodee-bidi value.
0184      * @return string with bidi opening marks.
0185      */
0186     static QString getBidiOpening(bool ltr, KoSvgText::UnicodeBidi bidi);
0187 
0188     /**
0189      * @brief getBidiClosing
0190      * Returns the bidi closing string associated with the given Css unicode-bidi value.
0191      * @param bidi -- the unicode-bidi value
0192      * @return string with bidi closing marks.
0193      */
0194     static QString getBidiClosing(KoSvgText::UnicodeBidi bidi);
0195 
0196     /**
0197      * @brief removeText
0198      * Special removal of text that takes a text, start and length and will modify these values
0199      * so that...
0200      * - Whole code points are deleted at any time, avoiding
0201      *   no dangling surrogates.
0202      * - Graphemes don't end with Zero-width-joiners, as that can lead
0203      *   to the grapheme merging with the next.
0204      * - Variation selectors are deleted along their base.
0205      * - regional sequences are deleted in pairs.
0206      * @param text text to remove text from.
0207      * @param start the start index, will be modified.
0208      * @param length the length.
0209      */
0210     static void removeText(QString &text, int &start, int length);
0211 };
0212 
0213 #endif // KOCSSTEXTUTILS_H