File indexing completed on 2024-04-28 04:32:47
0001 /* 0002 SPDX-FileCopyrightText: 2005 Piotr Szymanski <niedakh@gmail.com> 0003 0004 SPDX-License-Identifier: GPL-2.0-or-later 0005 */ 0006 0007 #ifndef _OKULAR_TEXTPAGE_H_ 0008 #define _OKULAR_TEXTPAGE_H_ 0009 0010 #include <QList> 0011 #include <QString> 0012 0013 #include "area.h" 0014 #include "global.h" 0015 #include "okularcore_export.h" 0016 0017 class QTransform; 0018 0019 namespace Okular 0020 { 0021 class NormalizedPoint; 0022 class NormalizedRect; 0023 class Page; 0024 class PagePrivate; 0025 class TextPagePrivate; 0026 class TextSelection; 0027 class RegularAreaRect; 0028 0029 /*! @class TextEntity 0030 * @short Represents a piece of text on a TextPage, containing its textual representation and its bounding box. 0031 * 0032 * To enable searching and text selection, a generator can give information about the textual 0033 * content of a Page using a TextPage. 0034 * A TextPage is created using TextEntity objects. 0035 * A TextEntity can represent a single character/glyph, a word, a line, or even the whole page. 0036 * 0037 * Ideally, every single glyph is represented by its own TextEntity. 0038 * If the textual representation of a graphical glyph contains more than one character, 0039 * the TextEntity must contain the whole string which represents the glyph. 0040 * 0041 * When the Generator has created the TextPage, and it is added to a Page, 0042 * the text entities are reordered to words, lines, and paragraphs, to optimize search and text selection. 0043 * This way, the Generator does not need to care about the logical order of lines or paragraphs. 0044 * 0045 * @par Text Selection/Highlighting 0046 * A TextEntity is the smallest piece of text, which the user can select, or which can be highlighted. 0047 * That is, if the TextEntity represents a word, only the whole word can be selected. 0048 * It would not be possible to select a single glyph of the word, because its bounding box is not known. 0049 * 0050 * @see TextPage, Generator 0051 */ 0052 class OKULARCORE_EXPORT TextEntity 0053 { 0054 public: 0055 typedef QList<TextEntity> List; 0056 0057 /** 0058 * Creates a new text entity with the given @p text and the 0059 * given @p area. 0060 */ 0061 TextEntity(const QString &text, const NormalizedRect &area); 0062 0063 /** 0064 * Destroys the text entity. 0065 */ 0066 ~TextEntity(); 0067 0068 /** 0069 * Returns the text of the text entity. 0070 */ 0071 QString text() const; 0072 0073 /** 0074 * Returns the bounding area of the text entity. 0075 */ 0076 NormalizedRect area() const; 0077 0078 /** 0079 * Returns the transformed area of the text entity. 0080 */ 0081 NormalizedRect transformedArea(const QTransform &matrix) const; 0082 0083 private: 0084 QString m_text; 0085 NormalizedRect m_area; 0086 }; 0087 0088 /** 0089 * @short Represents the textual information of a Page. Makes search and text selection possible. 0090 * 0091 * A Generator with text support should add a TextPage to every Page. 0092 * For every piece of text, a TextEntity is added, holding the string representation and the bounding box. 0093 * 0094 * Ideally, every TextEntity describes only one glyph. 0095 * A "glyph" is one character in the graphical representation, but the textual representation may consist of multiple characters (like diacritic modifiers). 0096 * 0097 * When the TextPage is added to the Page, the TextEntitys are restructured to optimize text selection. 0098 * 0099 * @see TextEntity 0100 */ 0101 class OKULARCORE_EXPORT TextPage 0102 { 0103 /// @cond PRIVATE 0104 friend class Page; 0105 friend class PagePrivate; 0106 /// @endcond 0107 0108 public: 0109 /** 0110 * Defines the behaviour of adding characters to text() result 0111 * @since 0.10 (KDE 4.4) 0112 */ 0113 enum TextAreaInclusionBehaviour { 0114 AnyPixelTextAreaInclusionBehaviour, ///< A character is included into text() result if any pixel of his bounding box is in the given area 0115 CentralPixelTextAreaInclusionBehaviour ///< A character is included into text() result if the central pixel of his bounding box is in the given area 0116 }; 0117 0118 /** 0119 * Creates a new text page. 0120 */ 0121 TextPage(); 0122 0123 /** 0124 * Creates a new text page with the given @p words. 0125 */ 0126 explicit TextPage(const TextEntity::List &words); 0127 0128 /** 0129 * Destroys the text page. 0130 */ 0131 ~TextPage(); 0132 0133 /** 0134 * Appends the given @p text with the given @p area as new 0135 * @ref TextEntity to the page. 0136 */ 0137 void append(const QString &text, const NormalizedRect &area); 0138 0139 /** 0140 * Returns the bounding rect of the text which matches the following criteria 0141 * or 0 if the search is not successful. 0142 * 0143 * @param searchID An unique id for this search. 0144 * @param query The search text. 0145 * @param direction The direction of the search (@ref SearchDirection) 0146 * @param caseSensitivity If Qt::CaseSensitive, the search is case sensitive; otherwise 0147 * the search is case insensitive. 0148 * @param area If null the search starts at the beginning of the page, otherwise 0149 * right/below the coordinates of the given rect. 0150 */ 0151 RegularAreaRect *findText(int searchID, const QString &query, SearchDirection direction, Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *area); 0152 0153 /** 0154 * Text extraction function. Looks for text in the given @p area. 0155 * 0156 * @return 0157 * - If @p area points to a valid null area, a null string. 0158 * - If @p area is nullptr, the whole page text as a single string. 0159 * - Otherwise, the text which is included by @p area, as a single string. 0160 * Uses AnyPixelTextAreaInclusionBehaviour 0161 */ 0162 QString text(const RegularAreaRect *area = nullptr) const; 0163 0164 /** 0165 * Text extraction function. Looks for text in the given @p area. 0166 * 0167 * @return 0168 * - If @p area points to a valid null area, a null string. 0169 * - If @p area is nullptr, the whole page text as a single string. 0170 * - Otherwise, the text which is included by @p area, as a single string. 0171 * @since 0.10 (KDE 4.4) 0172 */ 0173 QString text(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const; 0174 0175 /** 0176 * Text entity extraction function. Similar to text() but returns 0177 * the words including their bounding rectangles. Note that 0178 * ownership of the contents of the returned list belongs to the 0179 * caller. 0180 * @since 0.14 (KDE 4.8) 0181 */ 0182 TextEntity::List words(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const; 0183 0184 /** 0185 * Returns the area and text of the word at the given point 0186 * Note that ownership of the returned area belongs to the caller. 0187 * @since 0.15 (KDE 4.9) 0188 */ 0189 RegularAreaRect *wordAt(const NormalizedPoint &p, QString *word = nullptr) const; 0190 0191 /** 0192 * Returns the rectangular area of the given @p selection. 0193 */ 0194 RegularAreaRect *textArea(TextSelection *selection) const; 0195 0196 private: 0197 TextPagePrivate *const d; 0198 0199 Q_DISABLE_COPY(TextPage) 0200 }; 0201 0202 } 0203 0204 #endif