File indexing completed on 2025-01-19 03:52:37
0001 /* ============================================================ 0002 * 0003 * This file is a part of digiKam project 0004 * https://www.digikam.org 0005 * 0006 * Date : 2022-08-26 0007 * Description : OCR settings widgets 0008 * 0009 * SPDX-FileCopyrightText: 2008-2024 by Gilles Caulier <caulier dot gilles at gmail dot com> 0010 * SPDX-FileCopyrightText: 2022 by Quoc Hung Tran <quochungtran1999 at gmail dot com> 0011 * 0012 * SPDX-License-Identifier: GPL-2.0-or-later 0013 * 0014 * ============================================================ */ 0015 0016 #include "textconvertersettings.h" 0017 0018 // Qt includes 0019 0020 #include <QGridLayout> 0021 #include <QCheckBox> 0022 0023 // KDE includes 0024 0025 #include <klocalizedstring.h> 0026 #include <kconfiggroup.h> 0027 #include <ksharedconfig.h> 0028 0029 // Local includes 0030 0031 #include "digikam_debug.h" 0032 #include "dcombobox.h" 0033 #include "dmetadata.h" 0034 #include "dprogresswdg.h" 0035 #include "dexpanderbox.h" 0036 #include "dnuminput.h" 0037 #include "textconverterlist.h" 0038 #include "localizeselector.h" 0039 #include "localizesettings.h" 0040 0041 using namespace Digikam; 0042 0043 namespace DigikamGenericTextConverterPlugin 0044 { 0045 0046 class Q_DECL_HIDDEN TextConverterSettings::Private 0047 { 0048 public: 0049 0050 explicit Private() 0051 : ocrTesseractLanguageMode(nullptr), 0052 ocrTesseractPSMMode (nullptr), 0053 ocrTesseractOEMMode (nullptr), 0054 ocrTesseractDpi (nullptr), 0055 saveTextFile (nullptr), 0056 saveXMP (nullptr), 0057 localizeList (nullptr), 0058 multicores (nullptr) 0059 { 0060 } 0061 0062 // Tesseract options 0063 0064 DComboBox* ocrTesseractLanguageMode; 0065 0066 DComboBox* ocrTesseractPSMMode; 0067 0068 DComboBox* ocrTesseractOEMMode; 0069 0070 DIntNumInput* ocrTesseractDpi; 0071 0072 QCheckBox* saveTextFile; 0073 0074 QCheckBox* saveXMP; 0075 0076 LocalizeSelectorList* localizeList; 0077 0078 QCheckBox* multicores; 0079 }; 0080 0081 TextConverterSettings::TextConverterSettings(QWidget* const parent) 0082 : QWidget(parent), 0083 d (new Private) 0084 { 0085 QLabel* const ocrTesseractLanguageLabel = new QLabel(i18nc("@label", "Languages:")); 0086 d->ocrTesseractLanguageMode = new DComboBox(this); 0087 0088 d->ocrTesseractLanguageMode->setDefaultIndex(int(OcrOptions::LanguageModes::DEFAULT)); 0089 d->ocrTesseractLanguageMode->setToolTip(i18nc("@info", "Specify the language used for OCR. " 0090 "In the Default mode of Language settings for digital text with multiple languages, \n" 0091 "Tesseract can automatically recognize languages using Latin alphabets such as English or French, \n" 0092 "but is not compatible with languages using hieroglyphs such as Chinese, Japanese.\n" 0093 "You can use the Orientation and Script Detection mode instead or a specific language" 0094 "module if available.")); 0095 0096 // ------------ 0097 0098 QLabel* const ocrTesseractPSMLabel = new QLabel(i18nc("@label", "Segmentation mode:")); 0099 d->ocrTesseractPSMMode = new DComboBox(this); 0100 0101 QMap<OcrOptions::PageSegmentationModes, QPair<QString, QString> > psmMap = OcrOptions::psmNames(); 0102 QMap<OcrOptions::PageSegmentationModes, QPair<QString, QString> >::const_iterator it1 = psmMap.constBegin(); 0103 0104 while (it1 != psmMap.constEnd()) 0105 { 0106 d->ocrTesseractPSMMode->addItem(it1.value().first, (int)it1.key()); 0107 d->ocrTesseractPSMMode->combo()->setItemData((int)it1.key(), it1.value().second, Qt::ToolTipRole); 0108 ++it1; 0109 } 0110 0111 d->ocrTesseractPSMMode->setDefaultIndex(int(OcrOptions::PageSegmentationModes::DEFAULT)); 0112 d->ocrTesseractPSMMode->setToolTip(i18nc("@info", "Specify page segmentation mode.")); 0113 0114 // ------------ 0115 0116 QLabel* const ocrTesseractOEMLabel = new QLabel(i18nc("@label", "Engine mode:")); 0117 d->ocrTesseractOEMMode = new DComboBox(this); 0118 0119 QMap<OcrOptions::EngineModes, QPair<QString, QString> > oemMap = OcrOptions::oemNames(); 0120 QMap<OcrOptions::EngineModes, QPair<QString, QString> >::const_iterator it2 = oemMap.constBegin(); 0121 0122 while (it2 != oemMap.constEnd()) 0123 { 0124 d->ocrTesseractOEMMode->addItem(it2.value().first, (int)it2.key()); 0125 d->ocrTesseractOEMMode->combo()->setItemData((int)it2.key(), it2.value().second, Qt::ToolTipRole); 0126 ++it2; 0127 } 0128 0129 d->ocrTesseractOEMMode->setDefaultIndex(int(OcrOptions::EngineModes::DEFAULT)); 0130 d->ocrTesseractOEMMode->setToolTip(i18nc("@info", "Specify OCR engine mode.")); 0131 0132 // ------------ 0133 0134 QLabel* const ocrTesseractDpiLabel = new QLabel(i18nc("@label", "Resolution Dpi:")); 0135 d->ocrTesseractDpi = new DIntNumInput(this); 0136 d->ocrTesseractDpi->setRange(70, 2400, 1); 0137 d->ocrTesseractDpi->setToolTip(i18nc("@info", "Specify DPI for input image.")); 0138 d->ocrTesseractDpi->setDefaultValue(300); 0139 ocrTesseractDpiLabel->setBuddy(d->ocrTesseractDpi); 0140 0141 // ------------ 0142 0143 QLabel* const saveOcrResultLabel = new QLabel(i18nc("@label", "Store result in : ")); 0144 d->saveTextFile = new QCheckBox(i18nc("@option:check", "Text file"), this); 0145 d->saveTextFile->setToolTip(i18nc("@info", "Store OCR result in separated text file")); 0146 d->saveTextFile->setChecked(true); 0147 0148 d->saveXMP = new QCheckBox(i18nc("@option:check", "Metadata"), this); 0149 d->saveXMP->setToolTip(i18nc("@info", "Store OCR result in XMP metadata")); 0150 d->saveXMP->setChecked(true); 0151 0152 d->localizeList = new LocalizeSelectorList(this); 0153 slotLocalizeChanged(); 0154 0155 d->multicores = new QCheckBox(i18nc("@option:check", "Use Multi-cores"), this); 0156 d->multicores->setToolTip(i18nc("@info", "If this option is enabled, files will be processed in parallel")); 0157 d->multicores->setChecked(true); 0158 0159 // ------------ 0160 0161 QGridLayout* const settingsBoxLayout = new QGridLayout(this); 0162 settingsBoxLayout->addWidget(ocrTesseractLanguageLabel, 0, 0, 1, 1); 0163 settingsBoxLayout->addWidget(d->ocrTesseractLanguageMode, 0, 1, 1, 1); 0164 settingsBoxLayout->addWidget(ocrTesseractPSMLabel, 1, 0, 1, 1); 0165 settingsBoxLayout->addWidget(d->ocrTesseractPSMMode, 1, 1, 1, 1); 0166 settingsBoxLayout->addWidget(ocrTesseractOEMLabel, 2, 0, 1, 1); 0167 settingsBoxLayout->addWidget(d->ocrTesseractOEMMode, 2, 1, 1, 1); 0168 settingsBoxLayout->addWidget(ocrTesseractDpiLabel, 3, 0, 1, 1); 0169 settingsBoxLayout->addWidget(d->ocrTesseractDpi, 3, 1, 1, 1); 0170 settingsBoxLayout->addWidget(d->multicores, 4, 0, 1, 2); 0171 settingsBoxLayout->addWidget(saveOcrResultLabel, 5, 0, 1, 1); 0172 settingsBoxLayout->addWidget(d->saveTextFile, 6, 0, 1, 1); 0173 settingsBoxLayout->addWidget(d->saveXMP, 6, 1, 1, 1); 0174 settingsBoxLayout->addWidget(d->localizeList, 7, 0, 1, 2); 0175 settingsBoxLayout->setRowStretch(7, 10); 0176 settingsBoxLayout->setContentsMargins(QMargins()); 0177 0178 // ------------------------------------------------------------------------ 0179 0180 connect(d->ocrTesseractLanguageMode, SIGNAL(activated(int)), 0181 this, SIGNAL(signalSettingsChanged())); 0182 0183 connect(d->ocrTesseractPSMMode, SIGNAL(activated(int)), 0184 this, SIGNAL(signalSettingsChanged())); 0185 0186 connect(d->ocrTesseractOEMMode, SIGNAL(activated(int)), 0187 this, SIGNAL(signalSettingsChanged())); 0188 0189 connect(LocalizeSettings::instance(), &LocalizeSettings::signalSettingsChanged, 0190 this, &TextConverterSettings::slotLocalizeChanged); 0191 } 0192 0193 TextConverterSettings::~TextConverterSettings() 0194 { 0195 delete d; 0196 } 0197 0198 void TextConverterSettings::setDefaultSettings() 0199 { 0200 d->ocrTesseractLanguageMode->slotReset(); 0201 d->ocrTesseractOEMMode->slotReset(); 0202 d->ocrTesseractPSMMode->slotReset(); 0203 d->ocrTesseractDpi->slotReset(); 0204 d->saveTextFile->setChecked(true); 0205 d->localizeList->clearLanguages(); 0206 d->multicores->setChecked(false); 0207 } 0208 0209 void TextConverterSettings::setOcrOptions(const OcrOptions& opt) 0210 { 0211 int id = d->ocrTesseractLanguageMode->combo()->findData(opt.language); 0212 0213 d->ocrTesseractLanguageMode->setCurrentIndex((id == -1) ? int(OcrOptions::LanguageModes::DEFAULT) : id); 0214 d->ocrTesseractPSMMode->setCurrentIndex(opt.psm); 0215 d->ocrTesseractOEMMode->setCurrentIndex(opt.oem); 0216 d->ocrTesseractDpi->setValue(opt.dpi); 0217 d->saveTextFile->setChecked(opt.isSaveTextFile); 0218 d->saveXMP->setChecked(opt.isSaveXMP); 0219 0220 Q_FOREACH (const QString& lg, opt.translations) 0221 { 0222 d->localizeList->addLanguage(lg); 0223 } 0224 0225 d->multicores->setChecked(opt.multicores); 0226 } 0227 0228 OcrOptions TextConverterSettings::ocrOptions() const 0229 { 0230 OcrOptions opt; 0231 0232 opt.language = d->ocrTesseractLanguageMode->combo()->currentData().toString(); 0233 opt.psm = d->ocrTesseractPSMMode->currentIndex(); 0234 opt.oem = d->ocrTesseractOEMMode->currentIndex(); 0235 opt.dpi = d->ocrTesseractDpi->value(); 0236 opt.isSaveTextFile = d->saveTextFile->isChecked(); 0237 opt.isSaveXMP = d->saveXMP->isChecked(); 0238 opt.translations = d->localizeList->languagesList(); 0239 opt.multicores = d->multicores->isChecked(); 0240 0241 return opt; 0242 } 0243 0244 void TextConverterSettings::readSettings() 0245 { 0246 KSharedConfig::Ptr config = KSharedConfig::openConfig(); 0247 KConfigGroup group = config->group(QLatin1String("Text Converter Settings")); 0248 OcrOptions opt; 0249 opt.language = group.readEntry("OcrLanguages", QString()); 0250 opt.psm = group.readEntry("PageSegmentationModes", int(OcrOptions::PageSegmentationModes::DEFAULT)); 0251 opt.oem = group.readEntry("EngineModes", int(OcrOptions::EngineModes::DEFAULT)); 0252 opt.dpi = group.readEntry("Dpi", 300); 0253 opt.isSaveTextFile = group.readEntry("Check Save Test File", true); 0254 opt.isSaveXMP = group.readEntry("Check Save in XMP", true); 0255 opt.translations = group.readEntry("Translation Codes", QStringList()); 0256 opt.multicores = group.readEntry("Multicores", false); 0257 0258 setOcrOptions(opt); 0259 } 0260 0261 void TextConverterSettings::saveSettings() 0262 { 0263 KSharedConfig::Ptr config = KSharedConfig::openConfig(); 0264 KConfigGroup group = config->group(QLatin1String("Text Converter Settings")); 0265 OcrOptions opt = ocrOptions(); 0266 0267 group.writeEntry("OcrLanguages", opt.language); 0268 group.writeEntry("PageSegmentationModes", (int)opt.psm); 0269 group.writeEntry("EngineModes", (int)opt.oem); 0270 group.writeEntry("Dpi", (int)opt.dpi); 0271 group.writeEntry("Check Save Test File", (bool)opt.isSaveTextFile); 0272 group.writeEntry("Check Save in XMP", (bool)opt.isSaveXMP); 0273 group.writeEntry("Translation Codes", opt.translations); 0274 group.writeEntry("Multicores", (bool)opt.multicores); 0275 0276 config->sync(); 0277 } 0278 0279 void TextConverterSettings::populateLanguagesMode(const QStringList& langs) 0280 { 0281 if (langs.isEmpty()) 0282 { 0283 return; 0284 } 0285 0286 QStringList tlanguages = langs; 0287 0288 d->ocrTesseractLanguageMode->insertItem(int(OcrOptions::LanguageModes::DEFAULT), 0289 i18nc("@option: default Tesseract mode", "Default"), 0290 QString()); 0291 0292 if (tlanguages.contains(QLatin1String("osd"))) 0293 { 0294 d->ocrTesseractLanguageMode->insertItem(int(OcrOptions::LanguageModes::OSD), 0295 i18nc("@option: osd Tesseract mode", "Orientation and Script Detection"), 0296 QLatin1String("osd")); 0297 tlanguages.removeAll(QLatin1String("osd")); 0298 } 0299 0300 d->ocrTesseractLanguageMode->combo()->insertSeparator(d->ocrTesseractLanguageMode->combo()->count() + 1); 0301 0302 // All others languages are based on 3 letters ISO 639-2 0303 0304 DMetadata::CountryCodeMap codes = DMetadata::countryCodeMap2(); 0305 0306 Q_FOREACH (const QString& lg, tlanguages) 0307 { 0308 d->ocrTesseractLanguageMode->addItem(codes.value(lg, lg), lg); 0309 } 0310 } 0311 0312 void TextConverterSettings::slotLocalizeChanged() 0313 { 0314 d->localizeList->setTitle(i18nc("@label", "Translate with %1:", 0315 DOnlineTranslator::engineName(LocalizeSettings::instance()->settings().translatorEngine))); 0316 } 0317 0318 } // namespace DigikamGenericTextConverterPlugin 0319 0320 #include "moc_textconvertersettings.cpp"