File indexing completed on 2024-05-12 08:32:58
0001 /* 0002 SPDX-FileCopyrightText: 2005 Piotr Szymanski <niedakh@gmail.com> 0003 0004 SPDX-License-Identifier: GPL-2.0-or-later 0005 */ 0006 0007 #ifndef _OKULAR_TEXTPAGE_H_ 0008 #define _OKULAR_TEXTPAGE_H_ 0009 0010 #include <QList> 0011 #include <QString> 0012 0013 #include "global.h" 0014 #include "okularcore_export.h" 0015 0016 class QTransform; 0017 0018 namespace Okular 0019 { 0020 class NormalizedPoint; 0021 class NormalizedRect; 0022 class Page; 0023 class PagePrivate; 0024 class TextPagePrivate; 0025 class TextSelection; 0026 class RegularAreaRect; 0027 0028 /*! @class TextEntity 0029 * @short Represents a piece of text on a TextPage, containing its textual representation and its bounding box. 0030 * 0031 * To enable searching and text selection, a generator can give information about the textual 0032 * content of a Page using a TextPage. 0033 * A TextPage is created using TextEntity objects. 0034 * A TextEntity can represent a single character/glyph, a word, a line, or even the whole page. 0035 * 0036 * Ideally, every single glyph is represented by its own TextEntity. 0037 * If the textual representation of a graphical glyph contains more than one character, 0038 * the TextEntity must contain the whole string which represents the glyph. 0039 * 0040 * When the Generator has created the TextPage, and it is added to a Page, 0041 * the text entities are reordered to words, lines, and paragraphs, to optimize search and text selection. 0042 * This way, the Generator does not need to care about the logical order of lines or paragraphs. 0043 * 0044 * @par Text Selection/Highlighting 0045 * A TextEntity is the smallest piece of text, which the user can select, or which can be highlighted. 0046 * That is, if the TextEntity represents a word, only the whole word can be selected. 0047 * It would not be possible to select a single glyph of the word, because its bounding box is not known. 0048 * 0049 * @see TextPage, Generator 0050 */ 0051 class OKULARCORE_EXPORT TextEntity 0052 { 0053 public: 0054 typedef QList<TextEntity *> List; 0055 0056 /** 0057 * Creates a new text entity with the given @p text and the 0058 * given @p area. 0059 */ 0060 TextEntity(const QString &text, NormalizedRect *area); 0061 0062 /** 0063 * Destroys the text entity. 0064 */ 0065 ~TextEntity(); 0066 0067 /** 0068 * Returns the text of the text entity. 0069 */ 0070 QString text() const; 0071 0072 /** 0073 * Returns the bounding area of the text entity. 0074 */ 0075 NormalizedRect *area() const; 0076 0077 /** 0078 * Returns the transformed area of the text entity. 0079 */ 0080 NormalizedRect transformedArea(const QTransform &matrix) const; 0081 0082 private: 0083 QString m_text; 0084 NormalizedRect *m_area; 0085 0086 class Private; 0087 const Private *d; 0088 0089 Q_DISABLE_COPY(TextEntity) 0090 }; 0091 0092 /** 0093 * @short Represents the textual information of a Page. Makes search and text selection possible. 0094 * 0095 * A Generator with text support should add a TextPage to every Page. 0096 * For every piece of text, a TextEntity is added, holding the string representation and the bounding box. 0097 * 0098 * Ideally, every TextEntity describes only one glyph. 0099 * A "glyph" is one character in the graphical representation, but the textual representation may consist of multiple characters (like diacritic modifiers). 0100 * 0101 * When the TextPage is added to the Page, the TextEntitys are restructured to optimize text selection. 0102 * 0103 * @see TextEntity 0104 */ 0105 class OKULARCORE_EXPORT TextPage 0106 { 0107 /// @cond PRIVATE 0108 friend class Page; 0109 friend class PagePrivate; 0110 /// @endcond 0111 0112 public: 0113 /** 0114 * Defines the behaviour of adding characters to text() result 0115 * @since 0.10 (KDE 4.4) 0116 */ 0117 enum TextAreaInclusionBehaviour { 0118 AnyPixelTextAreaInclusionBehaviour, ///< A character is included into text() result if any pixel of his bounding box is in the given area 0119 CentralPixelTextAreaInclusionBehaviour ///< A character is included into text() result if the central pixel of his bounding box is in the given area 0120 }; 0121 0122 /** 0123 * Creates a new text page. 0124 */ 0125 TextPage(); 0126 0127 /** 0128 * Creates a new text page with the given @p words. 0129 */ 0130 explicit TextPage(const TextEntity::List &words); 0131 0132 /** 0133 * Destroys the text page. 0134 */ 0135 ~TextPage(); 0136 0137 /** 0138 * Appends the given @p text with the given @p area as new 0139 * @ref TextEntity to the page. 0140 */ 0141 void append(const QString &text, NormalizedRect *area); 0142 0143 /** 0144 * Returns the bounding rect of the text which matches the following criteria 0145 * or 0 if the search is not successful. 0146 * 0147 * @param searchID An unique id for this search. 0148 * @param query The search text. 0149 * @param direction The direction of the search (@ref SearchDirection) 0150 * @param caseSensitivity If Qt::CaseSensitive, the search is case sensitive; otherwise 0151 * the search is case insensitive. 0152 * @param area If null the search starts at the beginning of the page, otherwise 0153 * right/below the coordinates of the given rect. 0154 */ 0155 RegularAreaRect *findText(int searchID, const QString &query, SearchDirection direction, Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *area); 0156 0157 /** 0158 * Text extraction function. Looks for text in the given @p area. 0159 * 0160 * @return 0161 * - If @p area points to a valid null area, a null string. 0162 * - If @p area is nullptr, the whole page text as a single string. 0163 * - Otherwise, the text which is included by @p area, as a single string. 0164 * Uses AnyPixelTextAreaInclusionBehaviour 0165 */ 0166 QString text(const RegularAreaRect *area = nullptr) const; 0167 0168 /** 0169 * Text extraction function. Looks for text in the given @p area. 0170 * 0171 * @return 0172 * - If @p area points to a valid null area, a null string. 0173 * - If @p area is nullptr, the whole page text as a single string. 0174 * - Otherwise, the text which is included by @p area, as a single string. 0175 * @since 0.10 (KDE 4.4) 0176 */ 0177 QString text(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const; 0178 0179 /** 0180 * Text entity extraction function. Similar to text() but returns 0181 * the words including their bounding rectangles. Note that 0182 * ownership of the contents of the returned list belongs to the 0183 * caller. 0184 * @since 0.14 (KDE 4.8) 0185 */ 0186 TextEntity::List words(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const; 0187 0188 /** 0189 * Returns the area and text of the word at the given point 0190 * Note that ownership of the returned area belongs to the caller. 0191 * @since 0.15 (KDE 4.9) 0192 */ 0193 RegularAreaRect *wordAt(const NormalizedPoint &p, QString *word = nullptr) const; 0194 0195 /** 0196 * Returns the rectangular area of the given @p selection. 0197 */ 0198 RegularAreaRect *textArea(TextSelection *selection) const; 0199 0200 private: 0201 TextPagePrivate *const d; 0202 0203 Q_DISABLE_COPY(TextPage) 0204 }; 0205 0206 } 0207 0208 #endif