File indexing completed on 2025-01-19 03:52:36

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam project
0004  * https://www.digikam.org
0005  *
0006  * Date        : 2022-08-26
0007  * Description : OCR Tesseract options
0008  *
0009  * SPDX-FileCopyrightText: 2008-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0010  * SPDX-FileCopyrightText: 2022      by Quoc Hung Tran <quochungtran1999 at gmail dot com>
0011  *
0012  * SPDX-License-Identifier: GPL-2.0-or-later
0013  *
0014  * ============================================================ */
0015 
0016 #include "ocroptions.h"
0017 
0018 // KDE includes
0019 
0020 #include <klocalizedstring.h>
0021 
0022 // Local includes
0023 
0024 #include "digikam_debug.h"
0025 
0026 namespace DigikamGenericTextConverterPlugin
0027 {
0028 
0029 OcrOptions::OcrOptions()
0030     : psm             (int(PageSegmentationModes::DEFAULT)),
0031       oem             (int(EngineModes::DEFAULT)),
0032       dpi             (300),
0033       isSaveTextFile  (true),
0034       isSaveXMP       (true),
0035       iface           (nullptr),
0036       multicores      (false)
0037 {
0038 }
0039 
0040 OcrOptions::~OcrOptions()
0041 {
0042 }
0043 
0044 QMap<OcrOptions::PageSegmentationModes, QPair<QString, QString> > OcrOptions::psmNames()
0045 {
0046     QMap<PageSegmentationModes,  QPair<QString, QString> > psm;
0047 
0048     psm[PageSegmentationModes::OSD_ONLY]                                = qMakePair(QLatin1String("OSD only"),
0049                                                                                     i18nc("@info:tooltip",
0050                                                                                           "Orientation and script detection (OSD) only."));
0051     psm[PageSegmentationModes::AUTO_WITH_OSD]                           = qMakePair(QLatin1String("With OSD"),
0052                                                                                     i18nc("@info:tooltip",
0053                                                                                           "Automatic page segmentation with OSD."));
0054     psm[PageSegmentationModes::AUTO_WITH_NO_OSD]                        = qMakePair(QLatin1String("No OSD"),
0055                                                                                     i18nc("@info:tooltip",
0056                                                                                           "Automatic page segmentation, but no OSD, or OCR. "
0057                                                                                           "(not implemented)"));
0058     psm[PageSegmentationModes::DEFAULT]                                 = qMakePair(QLatin1String("Default"),
0059                                                                                     i18nc("@info:tooltip",
0060                                                                                           "Fully automatic page segmentation, but no OSD. "
0061                                                                                           "(Default)."));
0062     psm[PageSegmentationModes::SINGLE_COL_TEXT_OF_VAR_SIZE]             = qMakePair(QLatin1String("Col of text"),
0063                                                                                     i18nc("@info:tooltip",
0064                                                                                           "Assume a single column of text of variable sizes."));
0065     psm[PageSegmentationModes::SINGLE_UNIFORM_BLOCK_OF_VERTICALLY_TEXT] = qMakePair(QLatin1String("Vertically aligned"),
0066                                                                                     i18nc("@info:tooltip",
0067                                                                                           "Assume a single uniform block of vertically "
0068                                                                                           "aligned text."));
0069     psm[PageSegmentationModes::SINGLE_UNIFORM_BLOCK_TEXT]               = qMakePair(QLatin1String("Block"),
0070                                                                                     i18nc("@info:tooltip",
0071                                                                                           "Assume a single uniform block of text."));
0072     psm[PageSegmentationModes::SINGLE_TEXT_LINE]                        = qMakePair(QLatin1String("Line"),
0073                                                                                     i18nc("@info:tooltip",
0074                                                                                           "Treat the image as a single text line."));
0075     psm[PageSegmentationModes::SINGLE_WORD]                             = qMakePair(QLatin1String("Word"),
0076                                                                                     i18nc("@info:tooltip",
0077                                                                                           "Treat the image as a single word."));
0078     psm[PageSegmentationModes::SINGLE_WORD_IN_CIRCLE]                   = qMakePair(QLatin1String("Word in circle"),
0079                                                                                     i18nc("@info:tooltip",
0080                                                                                           "Treat the image as a single word in a circle."));
0081     psm[PageSegmentationModes::SINGLE_CHARACTER]                        = qMakePair(QLatin1String("Character"),
0082                                                                                     i18nc("@info:tooltip",
0083                                                                                           "Treat the image as a single character."));
0084     psm[PageSegmentationModes::SPARSE_TEXT]                             = qMakePair(QLatin1String("Sparse text"),
0085                                                                                     i18nc("@info:tooltip",
0086                                                                                           "Sparse text. Find as much text as possible in no "
0087                                                                                           "particular order."));
0088     psm[PageSegmentationModes::SPARSE_WITH_OSD]                         = qMakePair(QLatin1String("Sparse text + OSD"),
0089                                                                                     i18nc("@info:tooltip",
0090                                                                                           "Sparse text with OSD."));
0091     psm[PageSegmentationModes::RAW_LINE]                                = qMakePair(QLatin1String("Raw line"),
0092                                                                                     i18nc("@info:tooltip",
0093                                                                                           "Raw line. Treat the image as a single text line, "
0094                                                                                           "bypassing hacks that are Tesseract-specific."));
0095 
0096     return psm;
0097 }
0098 
0099 QMap<OcrOptions::EngineModes, QPair<QString, QString> > OcrOptions::oemNames()
0100 {
0101     QMap<EngineModes, QPair<QString, QString> > oem;
0102 
0103     oem[EngineModes::LEGACY_ENGINE_ONLY]    = qMakePair(QLatin1String("Legacy"),
0104                                                         i18nc("@info:tooltip",
0105                                                               "Legacy engine only."));
0106     oem[EngineModes::NEURAL_NETS_LSTM_ONLY] = qMakePair(QLatin1String("LSTM"),
0107                                                         i18nc("@info:tooltip",
0108                                                               "Neural nets LSTM engine only."));
0109     oem[EngineModes::LEGACY_LSTM_ENGINES]   = qMakePair(QLatin1String("Legacy + LSTM"),
0110                                                         i18nc("@info:tooltip",
0111                                                               "Legacy + LSTM engines."));
0112     oem[EngineModes::DEFAULT]               = qMakePair(QLatin1String("Default"),
0113                                                         i18nc("@info:tooltip",
0114                                                               "Default, based on what is available."));
0115 
0116     return oem;
0117 }
0118 
0119 QString OcrOptions::PsmCodeToValue(OcrOptions::PageSegmentationModes psm) const
0120 {
0121    return QString::fromLatin1("%1").arg((int)psm);  // psm tesseract cli values if range from 0 to 13
0122 }
0123 
0124 QString OcrOptions::OemCodeToValue(OcrOptions::EngineModes oem) const
0125 {
0126    return QString::fromLatin1("%1").arg((int)oem);  // oem tesseract cli values if range from 0 to 3
0127 }
0128 
0129 } // namespace DigikamGenericTextConverterPlugin