File indexing completed on 2025-01-19 03:52:36
0001 /* ============================================================ 0002 * 0003 * This file is a part of digiKam project 0004 * https://www.digikam.org 0005 * 0006 * Date : 2022-08-26 0007 * Description : OCR Tesseract engine 0008 * 0009 * SPDX-FileCopyrightText: 2008-2024 by Gilles Caulier <caulier dot gilles at gmail dot com> 0010 * SPDX-FileCopyrightText: 2022 by Quoc Hung Tran <quochungtran1999 at gmail dot com> 0011 * 0012 * SPDX-License-Identifier: GPL-2.0-or-later 0013 * 0014 * ============================================================ */ 0015 0016 #include "ocrtesseractengine.h" 0017 0018 // Qt includes 0019 0020 #include <QDir> 0021 #include <QPointer> 0022 #include <QProcess> 0023 #include <QFileInfo> 0024 0025 // local includes 0026 0027 #include "digikam_debug.h" 0028 #include "digikam_globals.h" 0029 #include "localizeselector.h" 0030 0031 namespace DigikamGenericTextConverterPlugin 0032 { 0033 0034 class OcrTesseractEngine::Private 0035 { 0036 0037 public: 0038 0039 Private() 0040 : cancel (false) 0041 { 0042 } 0043 0044 OcrOptions opt; 0045 0046 bool cancel; 0047 0048 QPointer<QProcess> ocrProcess; 0049 0050 QString inputFile; 0051 QString outputFile; 0052 QString ocrResult; 0053 }; 0054 0055 OcrTesseractEngine::OcrTesseractEngine() 0056 : d(new Private()) 0057 { 0058 } 0059 0060 OcrTesseractEngine::~OcrTesseractEngine() 0061 { 0062 delete d->ocrProcess; 0063 delete d; 0064 } 0065 0066 void OcrTesseractEngine::setOcrOptions(const OcrOptions& opt) 0067 { 0068 d->opt = opt; 0069 } 0070 0071 OcrOptions OcrTesseractEngine::ocrOptions() const 0072 { 0073 return d->opt; 0074 } 0075 0076 QString OcrTesseractEngine::inputFile() const 0077 { 0078 return d->inputFile; 0079 } 0080 0081 QString OcrTesseractEngine::outputFile() const 0082 { 0083 return d->outputFile; 0084 } 0085 0086 QString OcrTesseractEngine::outputText() const 0087 { 0088 return d->ocrResult; 0089 } 0090 0091 void OcrTesseractEngine::setInputFile(const QString& filePath) 0092 { 0093 d->inputFile = filePath; 0094 } 0095 0096 void OcrTesseractEngine::setOutputFile(const QString& filePath) 0097 { 0098 d->outputFile = filePath; 0099 } 0100 0101 int OcrTesseractEngine::runOcrProcess() 0102 { 0103 if (d->cancel) 0104 { 0105 return PROCESS_CANCELED; 0106 } 0107 0108 d->ocrProcess = new QProcess(); 0109 d->ocrProcess->setProcessEnvironment(adjustedEnvironmentForAppImage()); 0110 d->ocrProcess->setProcessChannelMode(QProcess::SeparateChannels); 0111 0112 // ------------------------- IN/OUT ARGUMENTS ------------------------- 0113 0114 QStringList args; 0115 0116 // add configuration image 0117 0118 if (!d->inputFile.isEmpty()) 0119 { 0120 args << d->inputFile; 0121 } 0122 0123 // output base name 0124 0125 QString mess; 0126 0127 args << QLatin1String("stdout"); 0128 0129 // ----------------------------- OPTIONS ----------------------------- 0130 0131 // page Segmentation mode 0132 0133 QString val = d->opt.PsmCodeToValue(static_cast<OcrOptions::PageSegmentationModes>(d->opt.psm)); 0134 0135 if (!val.isEmpty()) 0136 { 0137 args << QLatin1String("--psm") << val; 0138 } 0139 0140 // OCR enginge mode 0141 0142 val = d->opt.OemCodeToValue(static_cast<OcrOptions::EngineModes>(d->opt.oem)); 0143 0144 if (!val.isEmpty()) 0145 { 0146 args << QLatin1String("--oem") << val; 0147 } 0148 0149 // Language 0150 0151 val = d->opt.language; 0152 0153 if (!val.isEmpty()) 0154 { 0155 args << QLatin1String("-l") << val; 0156 } 0157 0158 // dpi 0159 0160 val = QString::fromLatin1("%1").arg(d->opt.dpi); 0161 0162 if (!val.isEmpty()) 0163 { 0164 args << QLatin1String("--dpi") << val; 0165 } 0166 0167 // ------------------ Running tesseract process ------------------ 0168 0169 d->ocrProcess->setWorkingDirectory(QDir::tempPath()); 0170 d->ocrProcess->setProgram(d->opt.tesseractPath); 0171 d->ocrProcess->setArguments(args); 0172 0173 qCDebug(DIGIKAM_GENERAL_LOG) << "Running OCR : " 0174 << d->ocrProcess->program() 0175 << d->ocrProcess->arguments(); 0176 0177 d->ocrProcess->start(); 0178 0179 if (!d->ocrProcess->waitForStarted(10000)) 0180 { 0181 qCWarning(DIGIKAM_GENERAL_LOG) << "Error starting OCR Process"; 0182 0183 return PROCESS_FAILED; 0184 } 0185 0186 if (!d->ocrProcess->waitForFinished(-1) || 0187 (d->ocrProcess->exitStatus() != QProcess::NormalExit)) 0188 { 0189 if (d->cancel) 0190 { 0191 return PROCESS_CANCELED; 0192 } 0193 0194 qCWarning(DIGIKAM_GENERAL_LOG) << "Error finish OCR Process"; 0195 0196 return PROCESS_FAILED; 0197 } 0198 0199 if (d->ocrProcess->error() != QProcess::UnknownError) 0200 { 0201 qCWarning(DIGIKAM_GENERAL_LOG) << "Text Converter has error" 0202 << d->ocrProcess->error(); 0203 0204 return PROCESS_FAILED; 0205 } 0206 0207 d->ocrResult = QString::fromUtf8(d->ocrProcess->readAllStandardOutput()); 0208 0209 saveOcrResult(); 0210 0211 return PROCESS_COMPLETE; 0212 } 0213 0214 void OcrTesseractEngine::saveOcrResult() 0215 { 0216 MetaEngine::AltLangMap commentsMap; 0217 commentsMap.insert(QLatin1String("x-default"), d->ocrResult); 0218 0219 if (d->opt.isSaveTextFile || d->opt.isSaveXMP) 0220 { 0221 translate(commentsMap, d->opt.translations); 0222 } 0223 0224 if (d->opt.isSaveTextFile) 0225 { 0226 saveTextFile(d->inputFile, d->outputFile, commentsMap); 0227 } 0228 0229 if (d->opt.isSaveXMP) 0230 { 0231 saveXMP(QUrl::fromLocalFile(d->inputFile), commentsMap, d->opt.iface); 0232 } 0233 } 0234 0235 void OcrTesseractEngine::translate(MetaEngine::AltLangMap& commentsMap, 0236 const QStringList& langs) 0237 { 0238 QString text = commentsMap[QLatin1String("x-default")]; 0239 0240 Q_FOREACH (const QString& lg, langs) 0241 { 0242 QString tr; 0243 QString error; 0244 0245 bool b = s_inlineTranslateString(text, lg, tr, error); 0246 0247 if (b) 0248 { 0249 commentsMap.insert(lg, tr); 0250 } 0251 else 0252 { 0253 qCWarning(DIGIKAM_GENERAL_LOG) << "Error while translating in" << lg << ":" << error; 0254 } 0255 } 0256 } 0257 0258 void OcrTesseractEngine::saveTextFile(const QString& inFile, 0259 QString& outFile, 0260 const MetaEngine::AltLangMap& commentsMap) 0261 { 0262 Q_FOREACH (const QString& lg, commentsMap.keys()) 0263 { 0264 QFileInfo fi(inFile); 0265 outFile = fi.absolutePath() + 0266 QLatin1String("/") + 0267 (QString::fromLatin1("%1-ocr-%2.txt").arg(fi.fileName()).arg(lg)); 0268 0269 QFile file(outFile); 0270 0271 if (file.open(QIODevice::ReadWrite | QIODevice::Truncate)) 0272 { 0273 QTextStream stream(&file); 0274 stream << commentsMap[lg]; 0275 file.close(); 0276 } 0277 } 0278 } 0279 0280 void OcrTesseractEngine::saveXMP(const QUrl& url, 0281 const MetaEngine::AltLangMap& commentsMap, 0282 DInfoInterface* const iface) 0283 { 0284 CaptionsMap commentsSet; 0285 QString author = QLatin1String("digiKam OCR Text Converter Plugin"); 0286 QDateTime dt = QDateTime::currentDateTime(); 0287 0288 MetaEngine::AltLangMap authorsMap; 0289 MetaEngine::AltLangMap datesMap; 0290 0291 Q_FOREACH (const QString& lg, commentsMap.keys()) 0292 { 0293 datesMap.insert(lg, dt.toString(Qt::ISODate)); 0294 authorsMap.insert(lg, author); 0295 } 0296 0297 commentsSet.setData(commentsMap, authorsMap, QString(), datesMap); 0298 0299 // --- Version using DInfoInterface 0300 0301 DItemInfo item; 0302 item.setCaptions(commentsSet); 0303 iface->setItemInfo(url, item.infoMap()); 0304 } 0305 0306 void OcrTesseractEngine::cancelOcrProcess() 0307 { 0308 d->cancel = true; 0309 0310 if (d->ocrProcess) 0311 { 0312 d->ocrProcess->kill(); 0313 } 0314 } 0315 0316 } // namespace DigikamGenericTextConverterPlugin 0317 0318 #include "moc_ocrtesseractengine.cpp"