File indexing completed on 2025-01-19 03:52:37

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam project
0004  * https://www.digikam.org
0005  *
0006  * Date        : 2022-08-26
0007  * Description : OCR settings widgets
0008  *
0009  * SPDX-FileCopyrightText: 2008-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0010  * SPDX-FileCopyrightText: 2022      by Quoc Hung Tran <quochungtran1999 at gmail dot com>
0011  *
0012  * SPDX-License-Identifier: GPL-2.0-or-later
0013  *
0014  * ============================================================ */
0015 
0016 #include "textconvertersettings.h"
0017 
0018 // Qt includes
0019 
0020 #include <QGridLayout>
0021 #include <QCheckBox>
0022 
0023 // KDE includes
0024 
0025 #include <klocalizedstring.h>
0026 #include <kconfiggroup.h>
0027 #include <ksharedconfig.h>
0028 
0029 // Local includes
0030 
0031 #include "digikam_debug.h"
0032 #include "dcombobox.h"
0033 #include "dmetadata.h"
0034 #include "dprogresswdg.h"
0035 #include "dexpanderbox.h"
0036 #include "dnuminput.h"
0037 #include "textconverterlist.h"
0038 #include "localizeselector.h"
0039 #include "localizesettings.h"
0040 
0041 using namespace Digikam;
0042 
0043 namespace DigikamGenericTextConverterPlugin
0044 {
0045 
0046 class Q_DECL_HIDDEN TextConverterSettings::Private
0047 {
0048 public:
0049 
0050     explicit Private()
0051       : ocrTesseractLanguageMode(nullptr),
0052         ocrTesseractPSMMode     (nullptr),
0053         ocrTesseractOEMMode     (nullptr),
0054         ocrTesseractDpi         (nullptr),
0055         saveTextFile            (nullptr),
0056         saveXMP                 (nullptr),
0057         localizeList            (nullptr),
0058         multicores              (nullptr)
0059     {
0060     }
0061 
0062     // Tesseract options
0063 
0064     DComboBox*            ocrTesseractLanguageMode;
0065 
0066     DComboBox*            ocrTesseractPSMMode;
0067 
0068     DComboBox*            ocrTesseractOEMMode;
0069 
0070     DIntNumInput*         ocrTesseractDpi;
0071 
0072     QCheckBox*            saveTextFile;
0073 
0074     QCheckBox*            saveXMP;
0075 
0076     LocalizeSelectorList* localizeList;
0077 
0078     QCheckBox*            multicores;
0079 };
0080 
0081 TextConverterSettings::TextConverterSettings(QWidget* const parent)
0082     : QWidget(parent),
0083       d      (new Private)
0084 {
0085     QLabel* const ocrTesseractLanguageLabel   = new QLabel(i18nc("@label", "Languages:"));
0086     d->ocrTesseractLanguageMode               = new DComboBox(this);
0087 
0088     d->ocrTesseractLanguageMode->setDefaultIndex(int(OcrOptions::LanguageModes::DEFAULT));
0089     d->ocrTesseractLanguageMode->setToolTip(i18nc("@info", "Specify the language used for OCR. "
0090                                                            "In the Default mode of Language settings for digital text with multiple languages, \n"
0091                                                            "Tesseract can automatically recognize languages using Latin alphabets such as English or French, \n"
0092                                                            "but is not compatible with languages using hieroglyphs such as Chinese, Japanese.\n"
0093                                                            "You can use the Orientation and Script Detection mode instead or a specific language"
0094                                                            "module if available."));
0095 
0096     // ------------
0097 
0098     QLabel* const ocrTesseractPSMLabel  = new QLabel(i18nc("@label", "Segmentation mode:"));
0099     d->ocrTesseractPSMMode              = new DComboBox(this);
0100 
0101     QMap<OcrOptions::PageSegmentationModes, QPair<QString, QString> >                psmMap = OcrOptions::psmNames();
0102     QMap<OcrOptions::PageSegmentationModes, QPair<QString, QString> >::const_iterator it1   = psmMap.constBegin();
0103 
0104     while (it1 != psmMap.constEnd())
0105     {
0106         d->ocrTesseractPSMMode->addItem(it1.value().first, (int)it1.key());
0107         d->ocrTesseractPSMMode->combo()->setItemData((int)it1.key(), it1.value().second, Qt::ToolTipRole);
0108         ++it1;
0109     }
0110 
0111     d->ocrTesseractPSMMode->setDefaultIndex(int(OcrOptions::PageSegmentationModes::DEFAULT));
0112     d->ocrTesseractPSMMode->setToolTip(i18nc("@info", "Specify page segmentation mode."));
0113 
0114     // ------------
0115 
0116     QLabel* const ocrTesseractOEMLabel  = new QLabel(i18nc("@label", "Engine mode:"));
0117     d->ocrTesseractOEMMode              = new DComboBox(this);
0118 
0119     QMap<OcrOptions::EngineModes, QPair<QString, QString> >                oemMap  = OcrOptions::oemNames();
0120     QMap<OcrOptions::EngineModes, QPair<QString, QString> >::const_iterator it2    = oemMap.constBegin();
0121 
0122     while (it2 !=  oemMap.constEnd())
0123     {
0124         d->ocrTesseractOEMMode->addItem(it2.value().first, (int)it2.key());
0125         d->ocrTesseractOEMMode->combo()->setItemData((int)it2.key(), it2.value().second, Qt::ToolTipRole);
0126         ++it2;
0127     }
0128 
0129     d->ocrTesseractOEMMode->setDefaultIndex(int(OcrOptions::EngineModes::DEFAULT));
0130     d->ocrTesseractOEMMode->setToolTip(i18nc("@info", "Specify OCR engine mode."));
0131 
0132     // ------------
0133 
0134     QLabel* const ocrTesseractDpiLabel  = new QLabel(i18nc("@label", "Resolution Dpi:"));
0135     d->ocrTesseractDpi                  = new DIntNumInput(this);
0136     d->ocrTesseractDpi->setRange(70, 2400, 1);
0137     d->ocrTesseractDpi->setToolTip(i18nc("@info", "Specify DPI for input image."));
0138     d->ocrTesseractDpi->setDefaultValue(300);
0139     ocrTesseractDpiLabel->setBuddy(d->ocrTesseractDpi);
0140 
0141     // ------------
0142 
0143     QLabel* const saveOcrResultLabel = new QLabel(i18nc("@label", "Store result in : "));
0144     d->saveTextFile                  = new QCheckBox(i18nc("@option:check", "Text file"), this);
0145     d->saveTextFile->setToolTip(i18nc("@info", "Store OCR result in separated text file"));
0146     d->saveTextFile->setChecked(true);
0147 
0148     d->saveXMP                       = new QCheckBox(i18nc("@option:check", "Metadata"), this);
0149     d->saveXMP->setToolTip(i18nc("@info", "Store OCR result in XMP metadata"));
0150     d->saveXMP->setChecked(true);
0151 
0152     d->localizeList                  = new LocalizeSelectorList(this);
0153     slotLocalizeChanged();
0154 
0155     d->multicores                    = new QCheckBox(i18nc("@option:check", "Use Multi-cores"), this);
0156     d->multicores->setToolTip(i18nc("@info", "If this option is enabled, files will be processed in parallel"));
0157     d->multicores->setChecked(true);
0158 
0159     // ------------
0160 
0161     QGridLayout* const settingsBoxLayout = new QGridLayout(this);
0162     settingsBoxLayout->addWidget(ocrTesseractLanguageLabel,        0, 0, 1, 1);
0163     settingsBoxLayout->addWidget(d->ocrTesseractLanguageMode,      0, 1, 1, 1);
0164     settingsBoxLayout->addWidget(ocrTesseractPSMLabel,             1, 0, 1, 1);
0165     settingsBoxLayout->addWidget(d->ocrTesseractPSMMode,           1, 1, 1, 1);
0166     settingsBoxLayout->addWidget(ocrTesseractOEMLabel,             2, 0, 1, 1);
0167     settingsBoxLayout->addWidget(d->ocrTesseractOEMMode,           2, 1, 1, 1);
0168     settingsBoxLayout->addWidget(ocrTesseractDpiLabel,             3, 0, 1, 1);
0169     settingsBoxLayout->addWidget(d->ocrTesseractDpi,               3, 1, 1, 1);
0170     settingsBoxLayout->addWidget(d->multicores,                    4, 0, 1, 2);
0171     settingsBoxLayout->addWidget(saveOcrResultLabel,               5, 0, 1, 1);
0172     settingsBoxLayout->addWidget(d->saveTextFile,                  6, 0, 1, 1);
0173     settingsBoxLayout->addWidget(d->saveXMP,                       6, 1, 1, 1);
0174     settingsBoxLayout->addWidget(d->localizeList,                  7, 0, 1, 2);
0175     settingsBoxLayout->setRowStretch(7, 10);
0176     settingsBoxLayout->setContentsMargins(QMargins());
0177 
0178     // ------------------------------------------------------------------------
0179 
0180     connect(d->ocrTesseractLanguageMode, SIGNAL(activated(int)),
0181             this, SIGNAL(signalSettingsChanged()));
0182 
0183     connect(d->ocrTesseractPSMMode, SIGNAL(activated(int)),
0184             this, SIGNAL(signalSettingsChanged()));
0185 
0186     connect(d->ocrTesseractOEMMode, SIGNAL(activated(int)),
0187             this, SIGNAL(signalSettingsChanged()));
0188 
0189     connect(LocalizeSettings::instance(), &LocalizeSettings::signalSettingsChanged,
0190             this, &TextConverterSettings::slotLocalizeChanged);
0191 }
0192 
0193 TextConverterSettings::~TextConverterSettings()
0194 {
0195     delete d;
0196 }
0197 
0198 void TextConverterSettings::setDefaultSettings()
0199 {
0200     d->ocrTesseractLanguageMode->slotReset();
0201     d->ocrTesseractOEMMode->slotReset();
0202     d->ocrTesseractPSMMode->slotReset();
0203     d->ocrTesseractDpi->slotReset();
0204     d->saveTextFile->setChecked(true);
0205     d->localizeList->clearLanguages();
0206     d->multicores->setChecked(false);
0207 }
0208 
0209 void TextConverterSettings::setOcrOptions(const OcrOptions& opt)
0210 {
0211     int id = d->ocrTesseractLanguageMode->combo()->findData(opt.language);
0212 
0213     d->ocrTesseractLanguageMode->setCurrentIndex((id == -1) ? int(OcrOptions::LanguageModes::DEFAULT) : id);
0214     d->ocrTesseractPSMMode->setCurrentIndex(opt.psm);
0215     d->ocrTesseractOEMMode->setCurrentIndex(opt.oem);
0216     d->ocrTesseractDpi->setValue(opt.dpi);
0217     d->saveTextFile->setChecked(opt.isSaveTextFile);
0218     d->saveXMP->setChecked(opt.isSaveXMP);
0219 
0220     Q_FOREACH (const QString& lg, opt.translations)
0221     {
0222         d->localizeList->addLanguage(lg);
0223     }
0224 
0225     d->multicores->setChecked(opt.multicores);
0226 }
0227 
0228 OcrOptions TextConverterSettings::ocrOptions() const
0229 {
0230     OcrOptions opt;
0231 
0232     opt.language       = d->ocrTesseractLanguageMode->combo()->currentData().toString();
0233     opt.psm            = d->ocrTesseractPSMMode->currentIndex();
0234     opt.oem            = d->ocrTesseractOEMMode->currentIndex();
0235     opt.dpi            = d->ocrTesseractDpi->value();
0236     opt.isSaveTextFile = d->saveTextFile->isChecked();
0237     opt.isSaveXMP      = d->saveXMP->isChecked();
0238     opt.translations   = d->localizeList->languagesList();
0239     opt.multicores     = d->multicores->isChecked();
0240 
0241     return opt;
0242 }
0243 
0244 void TextConverterSettings::readSettings()
0245 {
0246     KSharedConfig::Ptr config = KSharedConfig::openConfig();
0247     KConfigGroup group        = config->group(QLatin1String("Text Converter Settings"));
0248     OcrOptions opt;
0249     opt.language       = group.readEntry("OcrLanguages",          QString());
0250     opt.psm            = group.readEntry("PageSegmentationModes", int(OcrOptions::PageSegmentationModes::DEFAULT));
0251     opt.oem            = group.readEntry("EngineModes",           int(OcrOptions::EngineModes::DEFAULT));
0252     opt.dpi            = group.readEntry("Dpi",                   300);
0253     opt.isSaveTextFile = group.readEntry("Check Save Test File",  true);
0254     opt.isSaveXMP      = group.readEntry("Check Save in XMP",     true);
0255     opt.translations   = group.readEntry("Translation Codes",     QStringList());
0256     opt.multicores     = group.readEntry("Multicores",            false);
0257 
0258     setOcrOptions(opt);
0259 }
0260 
0261 void TextConverterSettings::saveSettings()
0262 {
0263     KSharedConfig::Ptr config = KSharedConfig::openConfig();
0264     KConfigGroup group        = config->group(QLatin1String("Text Converter Settings"));
0265     OcrOptions opt            = ocrOptions();
0266 
0267     group.writeEntry("OcrLanguages",              opt.language);
0268     group.writeEntry("PageSegmentationModes",     (int)opt.psm);
0269     group.writeEntry("EngineModes",               (int)opt.oem);
0270     group.writeEntry("Dpi",                       (int)opt.dpi);
0271     group.writeEntry("Check Save Test File",      (bool)opt.isSaveTextFile);
0272     group.writeEntry("Check Save in XMP",         (bool)opt.isSaveXMP);
0273     group.writeEntry("Translation Codes",         opt.translations);
0274     group.writeEntry("Multicores",                (bool)opt.multicores);
0275 
0276     config->sync();
0277 }
0278 
0279 void TextConverterSettings::populateLanguagesMode(const QStringList& langs)
0280 {
0281     if (langs.isEmpty())
0282     {
0283         return;
0284     }
0285 
0286     QStringList tlanguages = langs;
0287 
0288     d->ocrTesseractLanguageMode->insertItem(int(OcrOptions::LanguageModes::DEFAULT),
0289                                             i18nc("@option: default Tesseract mode", "Default"),
0290                                             QString());
0291 
0292     if (tlanguages.contains(QLatin1String("osd")))
0293     {
0294         d->ocrTesseractLanguageMode->insertItem(int(OcrOptions::LanguageModes::OSD),
0295                                                 i18nc("@option: osd Tesseract mode", "Orientation and Script Detection"),
0296                                                 QLatin1String("osd"));
0297         tlanguages.removeAll(QLatin1String("osd"));
0298     }
0299 
0300     d->ocrTesseractLanguageMode->combo()->insertSeparator(d->ocrTesseractLanguageMode->combo()->count() + 1);
0301 
0302     // All others languages are based on 3 letters ISO 639-2
0303 
0304     DMetadata::CountryCodeMap codes = DMetadata::countryCodeMap2();
0305 
0306     Q_FOREACH (const QString& lg, tlanguages)
0307     {
0308          d->ocrTesseractLanguageMode->addItem(codes.value(lg, lg), lg);
0309     }
0310 }
0311 
0312 void TextConverterSettings::slotLocalizeChanged()
0313 {
0314     d->localizeList->setTitle(i18nc("@label", "Translate with %1:",
0315                               DOnlineTranslator::engineName(LocalizeSettings::instance()->settings().translatorEngine)));
0316 }
0317 
0318 } // namespace DigikamGenericTextConverterPlugin
0319 
0320 #include "moc_textconvertersettings.cpp"