File indexing completed on 2025-01-19 03:52:36

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam project
0004  * https://www.digikam.org
0005  *
0006  * Date        : 2017-07-04
0007  * Description : Autodetect Tesseract binary program
0008  *
0009  * SPDX-FileCopyrightText: 2017-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0010  *
0011  * SPDX-License-Identifier: GPL-2.0-or-later
0012  *
0013  * ============================================================ */
0014 
0015 #include "tesseractbinary.h"
0016 
0017 // Qt includes
0018 
0019 #include <QProcess>
0020 
0021 // KDE includes
0022 
0023 #include <klocalizedstring.h>
0024 
0025 // Local includes
0026 
0027 #include "digikam_debug.h"
0028 #include "digikam_config.h"
0029 #include "digikam_globals.h"
0030 
0031 namespace DigikamGenericTextConverterPlugin
0032 {
0033 
0034 TesseractBinary::TesseractBinary(QObject* const)
0035     : DBinaryIface(
0036                    QLatin1String("tesseract"),
0037                    QLatin1String("4.0.0"),
0038                    QLatin1String("tesseract "),
0039                    0,
0040                    QLatin1String("Tesseract OCR"),
0041                    QLatin1String("https://github.com/tesseract-ocr/tesseract#installing-tesseract"),
0042                    QLatin1String("TextConverter"),
0043                    QStringList(QLatin1String("--version")),
0044                    i18n("Tesseract is an optical character recognition engine for various operating systems.")
0045                   )
0046 {
0047     setup();
0048 }
0049 
0050 TesseractBinary::~TesseractBinary()
0051 {
0052 }
0053 
0054 QStringList TesseractBinary::tesseractLanguages() const
0055 {
0056     /*
0057      * Output look like this:
0058      *
0059      * tesseract --list-langs
0060      * List of available languages (3):
0061      * eng
0062      * fra
0063      * osd
0064      */
0065 
0066     QStringList langs;
0067     QProcess process;
0068     process.setProcessChannelMode(QProcess::MergedChannels);
0069     process.setProcessEnvironment(adjustedEnvironmentForAppImage());
0070     process.start(path(), QStringList() << QLatin1String("--list-langs"));
0071 
0072     qCDebug(DIGIKAM_GENERAL_LOG) << process.arguments();
0073 
0074     bool val = process.waitForFinished();
0075 
0076     if (val && (process.error() != QProcess::FailedToStart))
0077     {
0078         QString output    = QString::fromUtf8(process.readAllStandardOutput());
0079 
0080 #ifdef Q_OS_WIN
0081 
0082         QStringList lines = output.split(QLatin1String("\r\n"));
0083 
0084 #else
0085 
0086         QStringList lines = output.split(QLatin1Char('\n'));
0087 
0088 #endif
0089 
0090         bool found        = false;
0091 
0092         Q_FOREACH (const QString& l, lines)
0093         {
0094             qCDebug(DIGIKAM_GENERAL_LOG) << l;
0095 
0096             if (!found && l.startsWith(QLatin1String("List of available languages")))
0097             {
0098                 found = true;
0099                 continue;
0100             }
0101 
0102             if (found && !l.isEmpty())
0103             {
0104                 langs << l;
0105             }
0106         }
0107     }
0108 
0109     qCDebug(DIGIKAM_GENERAL_LOG) << "Tesseract Languages:" << langs;
0110 
0111     return langs;
0112 }
0113 
0114 } // namespace DigikamGenericTextConverterPlugin
0115 
0116 #include "moc_tesseractbinary.cpp"