File indexing completed on 2024-04-28 04:32:47

0001 /*
0002     SPDX-FileCopyrightText: 2005 Piotr Szymanski <niedakh@gmail.com>
0003 
0004     SPDX-License-Identifier: GPL-2.0-or-later
0005 */
0006 
0007 #ifndef _OKULAR_TEXTPAGE_H_
0008 #define _OKULAR_TEXTPAGE_H_
0009 
0010 #include <QList>
0011 #include <QString>
0012 
0013 #include "area.h"
0014 #include "global.h"
0015 #include "okularcore_export.h"
0016 
0017 class QTransform;
0018 
0019 namespace Okular
0020 {
0021 class NormalizedPoint;
0022 class NormalizedRect;
0023 class Page;
0024 class PagePrivate;
0025 class TextPagePrivate;
0026 class TextSelection;
0027 class RegularAreaRect;
0028 
0029 /*! @class TextEntity
0030  * @short Represents a piece of text on a TextPage, containing its textual representation and its bounding box.
0031  *
0032  * To enable searching and text selection, a generator can give information about the textual
0033  * content of a Page using a TextPage.
0034  * A TextPage is created using TextEntity objects.
0035  * A TextEntity can represent a single character/glyph, a word, a line, or even the whole page.
0036  *
0037  * Ideally, every single glyph is represented by its own TextEntity.
0038  * If the textual representation of a graphical glyph contains more than one character,
0039  * the TextEntity must contain the whole string which represents the glyph.
0040  *
0041  * When the Generator has created the TextPage, and it is added to a Page,
0042  * the text entities are reordered to words, lines, and paragraphs, to optimize search and text selection.
0043  * This way, the Generator does not need to care about the logical order of lines or paragraphs.
0044  *
0045  * @par Text Selection/Highlighting
0046  * A TextEntity is the smallest piece of text, which the user can select, or which can be highlighted.
0047  * That is, if the TextEntity represents a word, only the whole word can be selected.
0048  * It would not be possible to select a single glyph of the word, because its bounding box is not known.
0049  *
0050  * @see TextPage, Generator
0051  */
0052 class OKULARCORE_EXPORT TextEntity
0053 {
0054 public:
0055     typedef QList<TextEntity> List;
0056 
0057     /**
0058      * Creates a new text entity with the given @p text and the
0059      * given @p area.
0060      */
0061     TextEntity(const QString &text, const NormalizedRect &area);
0062 
0063     /**
0064      * Destroys the text entity.
0065      */
0066     ~TextEntity();
0067 
0068     /**
0069      * Returns the text of the text entity.
0070      */
0071     QString text() const;
0072 
0073     /**
0074      * Returns the bounding area of the text entity.
0075      */
0076     NormalizedRect area() const;
0077 
0078     /**
0079      * Returns the transformed area of the text entity.
0080      */
0081     NormalizedRect transformedArea(const QTransform &matrix) const;
0082 
0083 private:
0084     QString m_text;
0085     NormalizedRect m_area;
0086 };
0087 
0088 /**
0089  * @short Represents the textual information of a Page. Makes search and text selection possible.
0090  *
0091  * A Generator with text support should add a TextPage to every Page.
0092  * For every piece of text, a TextEntity is added, holding the string representation and the bounding box.
0093  *
0094  * Ideally, every TextEntity describes only one glyph.
0095  * A "glyph" is one character in the graphical representation, but the textual representation may consist of multiple characters (like diacritic modifiers).
0096  *
0097  * When the TextPage is added to the Page, the TextEntitys are restructured to optimize text selection.
0098  *
0099  * @see TextEntity
0100  */
0101 class OKULARCORE_EXPORT TextPage
0102 {
0103     /// @cond PRIVATE
0104     friend class Page;
0105     friend class PagePrivate;
0106     /// @endcond
0107 
0108 public:
0109     /**
0110      * Defines the behaviour of adding characters to text() result
0111      * @since 0.10 (KDE 4.4)
0112      */
0113     enum TextAreaInclusionBehaviour {
0114         AnyPixelTextAreaInclusionBehaviour,    ///< A character is included into text() result if any pixel of his bounding box is in the given area
0115         CentralPixelTextAreaInclusionBehaviour ///< A character is included into text() result if the central pixel of his bounding box is in the given area
0116     };
0117 
0118     /**
0119      * Creates a new text page.
0120      */
0121     TextPage();
0122 
0123     /**
0124      * Creates a new text page with the given @p words.
0125      */
0126     explicit TextPage(const TextEntity::List &words);
0127 
0128     /**
0129      * Destroys the text page.
0130      */
0131     ~TextPage();
0132 
0133     /**
0134      * Appends the given @p text with the given @p area as new
0135      * @ref TextEntity to the page.
0136      */
0137     void append(const QString &text, const NormalizedRect &area);
0138 
0139     /**
0140      * Returns the bounding rect of the text which matches the following criteria
0141      * or 0 if the search is not successful.
0142      *
0143      * @param searchID An unique id for this search.
0144      * @param query The search text.
0145      * @param direction The direction of the search (@ref SearchDirection)
0146      * @param caseSensitivity If Qt::CaseSensitive, the search is case sensitive; otherwise
0147      *                        the search is case insensitive.
0148      * @param area If null the search starts at the beginning of the page, otherwise
0149      *                 right/below the coordinates of the given rect.
0150      */
0151     RegularAreaRect *findText(int searchID, const QString &query, SearchDirection direction, Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *area);
0152 
0153     /**
0154      * Text extraction function. Looks for text in the given @p area.
0155      *
0156      * @return
0157      * - If @p area points to a valid null area, a null string.
0158      * - If @p area is nullptr, the whole page text as a single string.
0159      * - Otherwise, the text which is included by @p area, as a single string.
0160      * Uses AnyPixelTextAreaInclusionBehaviour
0161      */
0162     QString text(const RegularAreaRect *area = nullptr) const;
0163 
0164     /**
0165      * Text extraction function. Looks for text in the given @p area.
0166      *
0167      * @return
0168      * - If @p area points to a valid null area, a null string.
0169      * - If @p area is nullptr, the whole page text as a single string.
0170      * - Otherwise, the text which is included by @p area, as a single string.
0171      * @since 0.10 (KDE 4.4)
0172      */
0173     QString text(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const;
0174 
0175     /**
0176      * Text entity extraction function. Similar to text() but returns
0177      * the words including their bounding rectangles. Note that
0178      * ownership of the contents of the returned list belongs to the
0179      * caller.
0180      * @since 0.14 (KDE 4.8)
0181      */
0182     TextEntity::List words(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const;
0183 
0184     /**
0185      * Returns the area and text of the word at the given point
0186      * Note that ownership of the returned area belongs to the caller.
0187      * @since 0.15 (KDE 4.9)
0188      */
0189     RegularAreaRect *wordAt(const NormalizedPoint &p, QString *word = nullptr) const;
0190 
0191     /**
0192      * Returns the rectangular area of the given @p selection.
0193      */
0194     RegularAreaRect *textArea(TextSelection *selection) const;
0195 
0196 private:
0197     TextPagePrivate *const d;
0198 
0199     Q_DISABLE_COPY(TextPage)
0200 };
0201 
0202 }
0203 
0204 #endif