File indexing completed on 2025-01-19 03:52:36

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam project
0004  * https://www.digikam.org
0005  *
0006  * Date        : 2022-08-26
0007  * Description : OCR Tesseract options
0008  *
0009  * SPDX-FileCopyrightText: 2008-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0010  * SPDX-FileCopyrightText: 2022      by Quoc Hung Tran <quochungtran1999 at gmail dot com>
0011  *
0012  * SPDX-License-Identifier: GPL-2.0-or-later
0013  *
0014  * ============================================================ */
0015 
0016 #ifndef TESSERACT_OCR_OPTIONS_H
0017 #define TESSERACT_OCR_OPTIONS_H
0018 
0019 // Qt includes
0020 
0021 #include <QMap>
0022 #include <QString>
0023 
0024 // Local includes
0025 
0026 #include "dinfointerface.h"
0027 
0028 using namespace Digikam;
0029 
0030 namespace DigikamGenericTextConverterPlugin
0031 {
0032 
0033 class OcrOptions
0034 {
0035 
0036 public:
0037 
0038     enum class LanguageModes
0039     {
0040         /**
0041          * In the Default mode of Language settings for digital text with multiple languages,
0042          * Tesseract can automatically recognize languages using Latin alphabets such as English or French,
0043          * but is not compatible with languages using hieroglyphs such as Chinese, Japanese.
0044          */
0045         DEFAULT = 0,
0046 
0047         /**
0048          * Orientation and Script Detection mode remplace Default mode for hieroglyphs languages.
0049          * See comment on https://invent.kde.org/graphics/digikam/-/merge_requests/177#note_522008
0050          */
0051         OSD
0052     };
0053 
0054     enum class PageSegmentationModes
0055     {
0056         OSD_ONLY = 0,
0057         AUTO_WITH_OSD,
0058         AUTO_WITH_NO_OSD,
0059         DEFAULT,
0060         SINGLE_COL_TEXT_OF_VAR_SIZE,
0061         SINGLE_UNIFORM_BLOCK_OF_VERTICALLY_TEXT,
0062         SINGLE_UNIFORM_BLOCK_TEXT,
0063         SINGLE_TEXT_LINE,
0064         SINGLE_WORD,
0065         SINGLE_WORD_IN_CIRCLE,
0066         SINGLE_CHARACTER,
0067         SPARSE_TEXT,
0068         SPARSE_WITH_OSD,
0069         RAW_LINE
0070     };
0071 
0072     enum class EngineModes
0073     {
0074         LEGACY_ENGINE_ONLY = 0,
0075         NEURAL_NETS_LSTM_ONLY,
0076         LEGACY_LSTM_ENGINES,
0077         DEFAULT
0078     };
0079 
0080 public:
0081 
0082     explicit OcrOptions();
0083     ~OcrOptions();
0084 
0085 public:
0086 
0087     static QMap<PageSegmentationModes, QPair<QString, QString> > psmNames();
0088     static QMap<EngineModes,           QPair<QString, QString> > oemNames();
0089 
0090 public:
0091 
0092     QString PsmCodeToValue(PageSegmentationModes psm)   const;
0093     QString OemCodeToValue(EngineModes oem)             const;
0094 
0095 public:
0096 
0097     int             psm;                ///< Page segmentation mode.
0098     int             oem;                ///< OCR Engine mode
0099     int             dpi;                ///< Dot per inch.of input images.
0100     bool            isSaveTextFile;     ///< If true, save recognized text to text file.
0101     bool            isSaveXMP;          ///< If true, save recognized text to image XMP metadata alternative language tags.
0102 
0103     /**
0104      * ISO 639-2 3 letters Language code to use while performing OCR on images.
0105      * https://en.wikipedia.org/wiki/List_of_ISO_639-2_codes
0106      */
0107     QString         language;
0108 
0109     QString         tesseractPath;      ///< Path to tesseract binary program.
0110 
0111     QStringList     translations;       ///< List of translation codes to localize recognized text.
0112 
0113     DInfoInterface* iface;              ///< Host application interface.
0114 
0115     bool            multicores;         ///< Process files in parallel.
0116 };
0117 
0118 } // namespace DigikamGenericTextConverterPlugin
0119 
0120 #endif // TESSERACT_OCR_OPTIONS_H
0121