File indexing completed on 2025-01-19 03:52:36

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam project
0004  * https://www.digikam.org
0005  *
0006  * Date        : 2022-08-26
0007  * Description : OCR Tesseract engine
0008  *
0009  * SPDX-FileCopyrightText: 2008-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0010  * SPDX-FileCopyrightText: 2022      by Quoc Hung Tran <quochungtran1999 at gmail dot com>
0011  *
0012  * SPDX-License-Identifier: GPL-2.0-or-later
0013  *
0014  * ============================================================ */
0015 
0016 #include "ocrtesseractengine.h"
0017 
0018 // Qt includes
0019 
0020 #include <QDir>
0021 #include <QPointer>
0022 #include <QProcess>
0023 #include <QFileInfo>
0024 
0025 // local includes
0026 
0027 #include "digikam_debug.h"
0028 #include "digikam_globals.h"
0029 #include "localizeselector.h"
0030 
0031 namespace DigikamGenericTextConverterPlugin
0032 {
0033 
0034 class OcrTesseractEngine::Private
0035 {
0036 
0037 public:
0038 
0039     Private()
0040       : cancel (false)
0041     {
0042     }
0043 
0044     OcrOptions          opt;
0045 
0046     bool                cancel;
0047 
0048     QPointer<QProcess>  ocrProcess;
0049 
0050     QString             inputFile;
0051     QString             outputFile;
0052     QString             ocrResult;
0053 };
0054 
0055 OcrTesseractEngine::OcrTesseractEngine()
0056     : d(new Private())
0057 {
0058 }
0059 
0060 OcrTesseractEngine::~OcrTesseractEngine()
0061 {
0062    delete d->ocrProcess;
0063    delete d;
0064 }
0065 
0066 void OcrTesseractEngine::setOcrOptions(const OcrOptions& opt)
0067 {
0068     d->opt = opt;
0069 }
0070 
0071 OcrOptions OcrTesseractEngine::ocrOptions() const
0072 {
0073     return d->opt;
0074 }
0075 
0076 QString OcrTesseractEngine::inputFile() const
0077 {
0078     return d->inputFile;
0079 }
0080 
0081 QString OcrTesseractEngine::outputFile() const
0082 {
0083     return d->outputFile;
0084 }
0085 
0086 QString OcrTesseractEngine::outputText() const
0087 {
0088     return d->ocrResult;
0089 }
0090 
0091 void OcrTesseractEngine::setInputFile(const QString& filePath)
0092 {
0093     d->inputFile = filePath;
0094 }
0095 
0096 void OcrTesseractEngine::setOutputFile(const QString& filePath)
0097 {
0098     d->outputFile = filePath;
0099 }
0100 
0101 int OcrTesseractEngine::runOcrProcess()
0102 {
0103     if (d->cancel)
0104     {
0105         return PROCESS_CANCELED;
0106     }
0107 
0108     d->ocrProcess = new QProcess();
0109     d->ocrProcess->setProcessEnvironment(adjustedEnvironmentForAppImage());
0110     d->ocrProcess->setProcessChannelMode(QProcess::SeparateChannels);
0111 
0112     // ------------------------- IN/OUT ARGUMENTS -------------------------
0113 
0114     QStringList args;
0115 
0116     // add configuration image
0117 
0118     if (!d->inputFile.isEmpty())
0119     {
0120         args << d->inputFile;
0121     }
0122 
0123     // output base name
0124 
0125     QString mess;
0126 
0127     args << QLatin1String("stdout");
0128 
0129     // ----------------------------- OPTIONS -----------------------------
0130 
0131     // page Segmentation mode
0132 
0133     QString val = d->opt.PsmCodeToValue(static_cast<OcrOptions::PageSegmentationModes>(d->opt.psm));
0134 
0135     if (!val.isEmpty())
0136     {
0137         args << QLatin1String("--psm") << val;
0138     }
0139 
0140     // OCR enginge mode
0141 
0142     val = d->opt.OemCodeToValue(static_cast<OcrOptions::EngineModes>(d->opt.oem));
0143 
0144     if (!val.isEmpty())
0145     {
0146         args << QLatin1String("--oem") << val;
0147     }
0148 
0149     // Language
0150 
0151     val = d->opt.language;
0152 
0153     if (!val.isEmpty())
0154     {
0155         args << QLatin1String("-l") << val;
0156     }
0157 
0158     // dpi
0159 
0160     val = QString::fromLatin1("%1").arg(d->opt.dpi);
0161 
0162     if (!val.isEmpty())
0163     {
0164         args << QLatin1String("--dpi") << val;
0165     }
0166 
0167     // ------------------  Running tesseract process ------------------
0168 
0169     d->ocrProcess->setWorkingDirectory(QDir::tempPath());
0170     d->ocrProcess->setProgram(d->opt.tesseractPath);
0171     d->ocrProcess->setArguments(args);
0172 
0173     qCDebug(DIGIKAM_GENERAL_LOG) << "Running OCR : "
0174                                  << d->ocrProcess->program()
0175                                  << d->ocrProcess->arguments();
0176 
0177     d->ocrProcess->start();
0178 
0179     if (!d->ocrProcess->waitForStarted(10000))
0180     {
0181         qCWarning(DIGIKAM_GENERAL_LOG) << "Error starting OCR Process";
0182 
0183         return PROCESS_FAILED;
0184     }
0185 
0186     if (!d->ocrProcess->waitForFinished(-1)                 ||
0187         (d->ocrProcess->exitStatus() != QProcess::NormalExit))
0188     {
0189         if (d->cancel)
0190         {
0191             return PROCESS_CANCELED;
0192         }
0193 
0194         qCWarning(DIGIKAM_GENERAL_LOG) << "Error finish OCR Process";
0195 
0196         return PROCESS_FAILED;
0197     }
0198 
0199     if (d->ocrProcess->error() != QProcess::UnknownError)
0200     {
0201         qCWarning(DIGIKAM_GENERAL_LOG) << "Text Converter has error"
0202                                        << d->ocrProcess->error();
0203 
0204         return PROCESS_FAILED;
0205     }
0206 
0207     d->ocrResult = QString::fromUtf8(d->ocrProcess->readAllStandardOutput());
0208 
0209     saveOcrResult();
0210 
0211     return PROCESS_COMPLETE;
0212 }
0213 
0214 void OcrTesseractEngine::saveOcrResult()
0215 {
0216     MetaEngine::AltLangMap commentsMap;
0217     commentsMap.insert(QLatin1String("x-default"), d->ocrResult);
0218 
0219     if (d->opt.isSaveTextFile || d->opt.isSaveXMP)
0220     {
0221         translate(commentsMap, d->opt.translations);
0222     }
0223 
0224     if (d->opt.isSaveTextFile)
0225     {
0226         saveTextFile(d->inputFile, d->outputFile, commentsMap);
0227     }
0228 
0229     if (d->opt.isSaveXMP)
0230     {
0231         saveXMP(QUrl::fromLocalFile(d->inputFile), commentsMap, d->opt.iface);
0232     }
0233 }
0234 
0235 void OcrTesseractEngine::translate(MetaEngine::AltLangMap& commentsMap,
0236                                    const QStringList& langs)
0237 {
0238     QString text = commentsMap[QLatin1String("x-default")];
0239 
0240     Q_FOREACH (const QString& lg, langs)
0241     {
0242         QString tr;
0243         QString error;
0244 
0245         bool b = s_inlineTranslateString(text, lg, tr, error);
0246 
0247         if (b)
0248         {
0249             commentsMap.insert(lg, tr);
0250         }
0251         else
0252         {
0253             qCWarning(DIGIKAM_GENERAL_LOG) << "Error while translating in" << lg << ":" << error;
0254         }
0255     }
0256 }
0257 
0258 void OcrTesseractEngine::saveTextFile(const QString& inFile,
0259                                       QString& outFile,
0260                                       const MetaEngine::AltLangMap& commentsMap)
0261 {
0262     Q_FOREACH (const QString& lg, commentsMap.keys())
0263     {
0264         QFileInfo fi(inFile);
0265         outFile = fi.absolutePath()  +
0266                   QLatin1String("/") +
0267                   (QString::fromLatin1("%1-ocr-%2.txt").arg(fi.fileName()).arg(lg));
0268 
0269         QFile file(outFile);
0270 
0271         if (file.open(QIODevice::ReadWrite | QIODevice::Truncate))
0272         {
0273             QTextStream stream(&file);
0274             stream << commentsMap[lg];
0275             file.close();
0276         }
0277     }
0278 }
0279 
0280 void OcrTesseractEngine::saveXMP(const QUrl& url,
0281                                  const MetaEngine::AltLangMap& commentsMap,
0282                                  DInfoInterface* const iface)
0283 {
0284     CaptionsMap commentsSet;
0285     QString   author = QLatin1String("digiKam OCR Text Converter Plugin");
0286     QDateTime dt     = QDateTime::currentDateTime();
0287 
0288     MetaEngine::AltLangMap authorsMap;
0289     MetaEngine::AltLangMap datesMap;
0290 
0291     Q_FOREACH (const QString& lg, commentsMap.keys())
0292     {
0293         datesMap.insert(lg,   dt.toString(Qt::ISODate));
0294         authorsMap.insert(lg, author);
0295     }
0296 
0297     commentsSet.setData(commentsMap, authorsMap, QString(), datesMap);
0298 
0299     // --- Version using DInfoInterface
0300 
0301     DItemInfo item;
0302     item.setCaptions(commentsSet);
0303     iface->setItemInfo(url, item.infoMap());
0304 }
0305 
0306 void OcrTesseractEngine::cancelOcrProcess()
0307 {
0308     d->cancel = true;
0309 
0310     if (d->ocrProcess)
0311     {
0312         d->ocrProcess->kill();
0313     }
0314 }
0315 
0316 } // namespace DigikamGenericTextConverterPlugin
0317 
0318 #include "moc_ocrtesseractengine.cpp"