File indexing completed on 2024-05-12 08:32:58

0001 /*
0002     SPDX-FileCopyrightText: 2005 Piotr Szymanski <niedakh@gmail.com>
0003 
0004     SPDX-License-Identifier: GPL-2.0-or-later
0005 */
0006 
0007 #ifndef _OKULAR_TEXTPAGE_H_
0008 #define _OKULAR_TEXTPAGE_H_
0009 
0010 #include <QList>
0011 #include <QString>
0012 
0013 #include "global.h"
0014 #include "okularcore_export.h"
0015 
0016 class QTransform;
0017 
0018 namespace Okular
0019 {
0020 class NormalizedPoint;
0021 class NormalizedRect;
0022 class Page;
0023 class PagePrivate;
0024 class TextPagePrivate;
0025 class TextSelection;
0026 class RegularAreaRect;
0027 
0028 /*! @class TextEntity
0029  * @short Represents a piece of text on a TextPage, containing its textual representation and its bounding box.
0030  *
0031  * To enable searching and text selection, a generator can give information about the textual
0032  * content of a Page using a TextPage.
0033  * A TextPage is created using TextEntity objects.
0034  * A TextEntity can represent a single character/glyph, a word, a line, or even the whole page.
0035  *
0036  * Ideally, every single glyph is represented by its own TextEntity.
0037  * If the textual representation of a graphical glyph contains more than one character,
0038  * the TextEntity must contain the whole string which represents the glyph.
0039  *
0040  * When the Generator has created the TextPage, and it is added to a Page,
0041  * the text entities are reordered to words, lines, and paragraphs, to optimize search and text selection.
0042  * This way, the Generator does not need to care about the logical order of lines or paragraphs.
0043  *
0044  * @par Text Selection/Highlighting
0045  * A TextEntity is the smallest piece of text, which the user can select, or which can be highlighted.
0046  * That is, if the TextEntity represents a word, only the whole word can be selected.
0047  * It would not be possible to select a single glyph of the word, because its bounding box is not known.
0048  *
0049  * @see TextPage, Generator
0050  */
0051 class OKULARCORE_EXPORT TextEntity
0052 {
0053 public:
0054     typedef QList<TextEntity *> List;
0055 
0056     /**
0057      * Creates a new text entity with the given @p text and the
0058      * given @p area.
0059      */
0060     TextEntity(const QString &text, NormalizedRect *area);
0061 
0062     /**
0063      * Destroys the text entity.
0064      */
0065     ~TextEntity();
0066 
0067     /**
0068      * Returns the text of the text entity.
0069      */
0070     QString text() const;
0071 
0072     /**
0073      * Returns the bounding area of the text entity.
0074      */
0075     NormalizedRect *area() const;
0076 
0077     /**
0078      * Returns the transformed area of the text entity.
0079      */
0080     NormalizedRect transformedArea(const QTransform &matrix) const;
0081 
0082 private:
0083     QString m_text;
0084     NormalizedRect *m_area;
0085 
0086     class Private;
0087     const Private *d;
0088 
0089     Q_DISABLE_COPY(TextEntity)
0090 };
0091 
0092 /**
0093  * @short Represents the textual information of a Page. Makes search and text selection possible.
0094  *
0095  * A Generator with text support should add a TextPage to every Page.
0096  * For every piece of text, a TextEntity is added, holding the string representation and the bounding box.
0097  *
0098  * Ideally, every TextEntity describes only one glyph.
0099  * A "glyph" is one character in the graphical representation, but the textual representation may consist of multiple characters (like diacritic modifiers).
0100  *
0101  * When the TextPage is added to the Page, the TextEntitys are restructured to optimize text selection.
0102  *
0103  * @see TextEntity
0104  */
0105 class OKULARCORE_EXPORT TextPage
0106 {
0107     /// @cond PRIVATE
0108     friend class Page;
0109     friend class PagePrivate;
0110     /// @endcond
0111 
0112 public:
0113     /**
0114      * Defines the behaviour of adding characters to text() result
0115      * @since 0.10 (KDE 4.4)
0116      */
0117     enum TextAreaInclusionBehaviour {
0118         AnyPixelTextAreaInclusionBehaviour,    ///< A character is included into text() result if any pixel of his bounding box is in the given area
0119         CentralPixelTextAreaInclusionBehaviour ///< A character is included into text() result if the central pixel of his bounding box is in the given area
0120     };
0121 
0122     /**
0123      * Creates a new text page.
0124      */
0125     TextPage();
0126 
0127     /**
0128      * Creates a new text page with the given @p words.
0129      */
0130     explicit TextPage(const TextEntity::List &words);
0131 
0132     /**
0133      * Destroys the text page.
0134      */
0135     ~TextPage();
0136 
0137     /**
0138      * Appends the given @p text with the given @p area as new
0139      * @ref TextEntity to the page.
0140      */
0141     void append(const QString &text, NormalizedRect *area);
0142 
0143     /**
0144      * Returns the bounding rect of the text which matches the following criteria
0145      * or 0 if the search is not successful.
0146      *
0147      * @param searchID An unique id for this search.
0148      * @param query The search text.
0149      * @param direction The direction of the search (@ref SearchDirection)
0150      * @param caseSensitivity If Qt::CaseSensitive, the search is case sensitive; otherwise
0151      *                        the search is case insensitive.
0152      * @param area If null the search starts at the beginning of the page, otherwise
0153      *                 right/below the coordinates of the given rect.
0154      */
0155     RegularAreaRect *findText(int searchID, const QString &query, SearchDirection direction, Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *area);
0156 
0157     /**
0158      * Text extraction function. Looks for text in the given @p area.
0159      *
0160      * @return
0161      * - If @p area points to a valid null area, a null string.
0162      * - If @p area is nullptr, the whole page text as a single string.
0163      * - Otherwise, the text which is included by @p area, as a single string.
0164      * Uses AnyPixelTextAreaInclusionBehaviour
0165      */
0166     QString text(const RegularAreaRect *area = nullptr) const;
0167 
0168     /**
0169      * Text extraction function. Looks for text in the given @p area.
0170      *
0171      * @return
0172      * - If @p area points to a valid null area, a null string.
0173      * - If @p area is nullptr, the whole page text as a single string.
0174      * - Otherwise, the text which is included by @p area, as a single string.
0175      * @since 0.10 (KDE 4.4)
0176      */
0177     QString text(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const;
0178 
0179     /**
0180      * Text entity extraction function. Similar to text() but returns
0181      * the words including their bounding rectangles. Note that
0182      * ownership of the contents of the returned list belongs to the
0183      * caller.
0184      * @since 0.14 (KDE 4.8)
0185      */
0186     TextEntity::List words(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const;
0187 
0188     /**
0189      * Returns the area and text of the word at the given point
0190      * Note that ownership of the returned area belongs to the caller.
0191      * @since 0.15 (KDE 4.9)
0192      */
0193     RegularAreaRect *wordAt(const NormalizedPoint &p, QString *word = nullptr) const;
0194 
0195     /**
0196      * Returns the rectangular area of the given @p selection.
0197      */
0198     RegularAreaRect *textArea(TextSelection *selection) const;
0199 
0200 private:
0201     TextPagePrivate *const d;
0202 
0203     Q_DISABLE_COPY(TextPage)
0204 };
0205 
0206 }
0207 
0208 #endif