File indexing completed on 2024-05-12 15:55:08

0001 /************************************************************************
0002  *                                  *
0003  *  This file is part of Kooka, a scanning/OCR application using    *
0004  *  Qt <http://www.qt.io> and KDE Frameworks <http://www.kde.org>.  *
0005  *                                  *
0006  *  Copyright (C) 2000-2016 Klaas Freitag <freitag@suse.de>     *
0007  *                          Jonathan Marten <jjm@keelhaul.me.uk>    *
0008  *                                  *
0009  *  Kooka is free software; you can redistribute it and/or modify it    *
0010  *  under the terms of the GNU Library General Public License as    *
0011  *  published by the Free Software Foundation and appearing in the  *
0012  *  file COPYING included in the packaging of this file;  either    *
0013  *  version 2 of the License, or (at your option) any later version.    *
0014  *                                  *
0015  *  As a special exception, permission is given to link this program    *
0016  *  with any version of the KADMOS OCR/ICR engine (a product of     *
0017  *  reRecognition GmbH, Kreuzlingen), and distribute the resulting  *
0018  *  executable without including the source code for KADMOS in the  *
0019  *  source distribution.                        *
0020  *                                  *
0021  *  This program is distributed in the hope that it will be useful, *
0022  *  but WITHOUT ANY WARRANTY; without even the implied warranty of  *
0023  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the   *
0024  *  GNU General Public License for more details.            *
0025  *                                  *
0026  *  You should have received a copy of the GNU General Public       *
0027  *  License along with this program;  see the file COPYING.  If     *
0028  *  not, see <http://www.gnu.org/licenses/>.                *
0029  *                                  *
0030  ************************************************************************/
0031 
0032 #include "ocrocradengine.h"
0033 
0034 #include <qregexp.h>
0035 #include <qfile.h>
0036 #include <qdir.h>
0037 #include <qfileinfo.h>
0038 #include <qtemporaryfile.h>
0039 #include <qprocess.h>
0040 
0041 #include <klocalizedstring.h>
0042 #include <kpluginfactory.h>
0043 
0044 #include "imageformat.h"
0045 #include "kookasettings.h"
0046 #include "ocrocraddialog.h"
0047 #include "executablepathdialogue.h"
0048 #include "ocr_logging.h"
0049 
0050 
0051 K_PLUGIN_FACTORY_WITH_JSON(OcrOcradEngineFactory, "kookaocr-ocrad.json", registerPlugin<OcrOcradEngine>();)
0052 #include "ocrocradengine.moc"
0053 
0054 
0055 static const char UndetectedChar = '_';
0056 
0057 
0058 OcrOcradEngine::OcrOcradEngine(QObject *pnt, const QVariantList &args)
0059     : AbstractOcrEngine(pnt, "OcrOcradEngine")
0060 {
0061     m_ocrImagePBM = QString();
0062     m_tempOrfName = QString();
0063     ocradVersion = 0;
0064 }
0065 
0066 
0067 AbstractOcrDialogue *OcrOcradEngine::createOcrDialogue(AbstractOcrEngine *plugin, QWidget *pnt)
0068 {
0069     return (new OcrOcradDialog(plugin, pnt));
0070 }
0071 
0072 
0073 bool OcrOcradEngine::createOcrProcess(AbstractOcrDialogue *dia, ScanImage::Ptr img)
0074 {
0075     OcrOcradDialog *parentDialog = static_cast<OcrOcradDialog *>(dia);
0076     ocradVersion = parentDialog->getNumVersion();
0077 
0078     const QString cmd = parentDialog->getOCRCmd();
0079 
0080     const QString ocrResultFile = tempSaveImage(img, ImageFormat("BMP"), 8);
0081     setResultImage(ocrResultFile);
0082     // TODO: if the input file is local and is readable by OCRAD,
0083     // can use it directly (but don't delete it afterwards!)
0084     m_ocrImagePBM = tempSaveImage(img, ImageFormat("PBM"), 1);
0085 
0086     QProcess *proc = initOcrProcess();          // start process for OCR
0087     QStringList args;                   // arguments for process
0088 
0089     m_tempOrfName = tempFileName("orf");
0090     args << "-x" << m_tempOrfName;          // the ORF result file
0091 
0092     args << QFile::encodeName(m_ocrImagePBM);       // name of the input image
0093 
0094     // Layout Detection
0095     int layoutMode = KookaSettings::ocrOcradLayoutDetection();
0096     if (ocradVersion >= 18)             // OCRAD 0.18 or later
0097     {                           // has only on/off
0098         if (layoutMode != 0) args << "-l";
0099     }
0100     else                        // OCRAD 0.17 or earlier
0101     {                           // had 3 options
0102         args << "-l" << QString::number(layoutMode);
0103     }
0104 
0105     QString s = KookaSettings::ocrOcradFormat();
0106     if (!s.isEmpty()) args << "-F" << s;
0107 
0108     s = KookaSettings::ocrOcradCharset();
0109     if (!s.isEmpty()) args << "-c" << s;
0110 
0111     s = KookaSettings::ocrOcradFilter();
0112     if (!s.isEmpty()) args << "-e" << s;
0113 
0114     s = KookaSettings::ocrOcradTransform();
0115     if (!s.isEmpty()) args << "-t" << s;
0116 
0117     if (KookaSettings::ocrOcradInvert()) args << "-i";
0118 
0119     if (KookaSettings::ocrOcradThresholdEnable()) {
0120         s = KookaSettings::ocrOcradThresholdValue();
0121         if (!s.isEmpty()) args << "-T" << (s + "%");
0122     }
0123 
0124     if (verboseDebug()) args << "-v";
0125 
0126     s = KookaSettings::ocrOcradExtraArguments();
0127     if (!s.isEmpty()) args << s;
0128 
0129     proc->setProgram(cmd);
0130     proc->setArguments(args);
0131 
0132     proc->setProcessChannelMode(QProcess::SeparateChannels);
0133     m_tempStdoutLog = tempFileName("stdout.log");
0134     proc->setStandardOutputFile(m_tempStdoutLog);
0135 
0136     return (runOcrProcess());
0137 }
0138 
0139 
0140 QStringList OcrOcradEngine::tempFiles(bool retain)
0141 {
0142     QStringList result;
0143     result << m_ocrImagePBM;
0144     result << m_tempOrfName;
0145     result << m_tempStdoutLog;
0146 
0147     return (result);
0148 }
0149 
0150 
0151 bool OcrOcradEngine::finishedOcrProcess(QProcess *proc)
0152 {
0153     qCDebug(OCR_LOG);
0154     QString errStr = readORF(m_tempOrfName);        // parse the OCR results
0155     if (errStr.isEmpty()) return (true);        // parsed successfulyl
0156 
0157     setErrorText(errStr);               // record the parse error
0158     return (false);                 // parsing failed
0159 }
0160 
0161 
0162 /*
0163   From http://kooka.kde.org/news/
0164 
0165 ORF Proposal: Ocr Result File    August 20, 2003
0166 
0167 Ocrad is the first OCR (Optical Character Recognition) application that implements
0168 output of OCR results in a special file format that could be easily processed by
0169 frontend programs.  To provide a proper frontend connection, ocrad implements the
0170 export of the OCR results into a so called ORF, which simply means Ocr Result File.
0171 
0172 The ORF Format is a special file format that contains OCR results like the detected
0173 characters and their position on the source image in a simply parseable format.
0174 Frontend programs can read the file and retrieve information about the OCR engine
0175 run and show up the results visually.
0176 
0177 All lines starting with '#' are ignored.
0178 
0179 The first valid line has the form 'source file filename', where 'filename' is the
0180 name of the PBM file being processed.
0181 
0182 The second valid line has the form 'total blocks n', where 'n' is the total number
0183 of text blocks in the source image.
0184 
0185 For each text block in the source image, the following data follows:
0186 
0187   A line in the form 'block i x y w h', where 'i' is the block number and 'x y w h'
0188   are the block position and size as described below for character boxes.
0189 
0190   A line in the form 'lines n', where 'n' is the number of lines in this block.
0191 
0192 For each line in every text block, the following data follows:
0193 
0194   A line in the form 'line i chars n height h', where 'i' is the line number,
0195   'n' is the number of characters in this line,
0196   and 'h' is the mean height of the characters in this line (in pixels).
0197 
0198   n lines (one for every character) in the form "x y w h b;g[,'c'v]...".
0199   'x' = the left border (x-coordinate) of the char bounding box in the source image (in pixels).
0200   'y' = the top border (y-coordinate).
0201   'w' = the width of the bounding box.
0202   'h' = the height of the bounding box.
0203   'b' = the percent of black pixels in the bounding box.
0204   'g' = the number of different recognition guesses for this character.
0205 
0206   The result characters follow after the number of guesses in the form of a
0207   comma-separated list of pairs. Every pair is formed by the actual recognised
0208   char enclosed in single quotes, followed by the confidence value without
0209   space between them.
0210 
0211 See the following snippet (the beginning of an orf) as a sample ORF:
0212 
0213   # Ocr Results File. Created by GNU ocrad version 0.4
0214   source file test1.pbm
0215   total blocks 1
0216   block 1 0 0 560 792
0217   lines 12
0218   line 1 chars 10 height 26
0219   71 109 17 26;2,'0'1,'o'0
0220   93 109 15 26;2,'1'1,'l'0
0221   110 109 18 26;1,'2'0
0222   131 109 18 26;1,'3'0
0223   151 109 19 26;1,'4'0
0224   172 109 17 26;1,'5'0
0225   193 109 17 26;1,'6'0
0226   213 108 17 27;1,'7'0
0227   232 109 18 26;1,'8'0
0228   253 109 17 26;1,'9'0
0229   line 2 chars 14 height 27
0230   68 153 29 27;1,'A'0
0231   97 153 24 27;1,'B'0
0232   ...
0233 
0234 The ORF format was defined by Antonio Diaz and Klaas Freitag. Comments are very
0235 welcome.
0236 */
0237 
0238 QString OcrOcradEngine::readORF(const QString &fileName)
0239 {
0240     QFile file(fileName);
0241     // some checks on the ORF
0242     if (!file.exists()) {
0243         return (xi18nc("@info", "File <filename>%1</filename> does not exist", fileName));
0244     }
0245     QFileInfo fi(fileName);
0246     if (!fi.isReadable()) {
0247         return (xi18nc("@info", "File <filename>%1</filename> unreadable", fileName));
0248     }
0249 
0250     if (!file.open(QIODevice::ReadOnly)) {
0251         return (xi18nc("@info", "Cannot open file <filename>%1</filename>", fileName));
0252     }
0253     QTextStream stream(&file);
0254 
0255     qCDebug(OCR_LOG) << "Starting to analyse ORF" << fileName << "version" << ocradVersion;
0256 
0257     // to match "block 1 0 0 560 792"
0258     const QRegExp rx1("^.*block\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)");
0259     // to match "line 5 chars 13 height 20"
0260     const QRegExp rx2("^line\\s+(\\d+)\\s+chars\\s+(\\d+)\\s+height\\s+\\d+");
0261     // to match " 1, 'r'0"
0262     const QRegExp rx3("^\\s*(\\d+)");
0263     // to match "110 109 18 26"
0264     const QRegExp rx4("(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)");
0265 
0266     /* use a global line number counter here, not the one from the orf. The orf one
0267      * starts at 0 for every block, but we want line-no counting page global here.
0268      */
0269     int lineNo = 0;
0270     int blockCnt = 0;
0271     QString line;
0272     QRect blockRect;
0273 
0274     startResultDocument();
0275 
0276     while (!stream.atEnd()) {
0277         line = stream.readLine().trimmed();     // line of text excluding '\n'
0278 
0279         if (line.startsWith("#")) {
0280             continue;    // ignore comments
0281         }
0282 
0283         if (verboseDebug()) {
0284             qCDebug(OCR_LOG) << "# Line" << line;
0285         }
0286         if (line.startsWith("source file ")) {
0287             continue;                   // source file name, ignore
0288         } else if (line.startsWith("total blocks ")) {  // total count of blocks,
0289                             // must be first line
0290             blockCnt = line.mid(13).toInt();
0291             qCDebug(OCR_LOG) << "Block count (V<10)" << blockCnt;
0292         } else if (line.startsWith("total text blocks ")) {
0293             blockCnt = line.mid(18).toInt();
0294             qCDebug(OCR_LOG) << "Block count (V>10)" << blockCnt;
0295         } else if (line.startsWith("block ") || line.startsWith("text block ")) {
0296                             // start of text block
0297                             // matching "block 1 0 0 560 792"
0298             if (rx1.indexIn(line) == -1) {
0299                 qCDebug(OCR_LOG) << "Failed to match 'block' line" << line;
0300                 continue;
0301             }
0302 
0303             int currBlock = (rx1.cap(1).toInt()) - 1;
0304             blockRect.setRect(rx1.cap(2).toInt(), rx1.cap(3).toInt(),
0305                               rx1.cap(4).toInt(), rx1.cap(5).toInt());
0306             if (verboseDebug()) qCDebug(OCR_LOG) << "Current block" << currBlock << "rect" << blockRect;
0307         } else if (line.startsWith("lines ")) {     // lines in this block
0308             if (verboseDebug()) qCDebug(OCR_LOG) << "Block line count" << line.mid(6).toInt();
0309         } else if (line.startsWith("line ")) {      // start of text line
0310             startLine();
0311 
0312             if (rx2.indexIn(line) == -1) {
0313                 qCDebug(OCR_LOG) << "Failed to match 'line' line" << line;
0314                 continue;
0315             }
0316 
0317             int charCount = rx2.cap(2).toInt();
0318             if (verboseDebug()) qCDebug(OCR_LOG) << "Expecting" << charCount << "chars for line" << lineNo;
0319 
0320             QString word;
0321             QRect wordRect;
0322 
0323             for (int c = 0; c < charCount && !stream.atEnd(); ++c) {
0324                 // read one line per character
0325                 QString charLine = stream.readLine();
0326                 int semiPos = charLine.indexOf(';');
0327                 if (semiPos == -1) {
0328                     qCDebug(OCR_LOG) << "No ';' in 'char' line" << charLine;
0329                     continue;
0330                 }
0331 
0332                 // rectStr contains the rectangle of the character
0333                 QString rectStr = charLine.left(semiPos);
0334                 // resultStr contains the OCRed result character(s)
0335                 QString resultStr = charLine.mid(semiPos + 1);
0336 
0337                 QChar detectedChar = UndetectedChar;
0338 
0339                 // find how many alternatives, matching " 1, 'r'0"
0340                 if (rx3.indexIn(resultStr) == -1) {
0341                     qCDebug(OCR_LOG) << "Failed to match" << resultStr << "in 'char' line" << charLine;
0342                     continue;
0343                 }
0344 
0345                 int altCount = rx3.cap(1).toInt();
0346                 if (altCount == 0) {            // no alternatives,
0347                             // undecipherable character
0348                     if (verboseDebug()) {
0349                         qCDebug(OCR_LOG) << "Undecipherable character in 'char' line" << charLine;
0350                     }
0351                 } else {
0352                     int h = resultStr.indexOf(',');
0353                     if (h == -1) {
0354                         qCDebug(OCR_LOG) << "No ',' in" << resultStr << "in 'char' line" << charLine;
0355                         continue;
0356                     }
0357                     resultStr = resultStr.remove(0, h + 1).trimmed();
0358 
0359                     // TODO: this only uses the first alternative
0360                     detectedChar = resultStr.at(1);
0361 
0362                     // Analyse the result rectangle
0363                     if (detectedChar != ' ') {
0364                         if (rx4.indexIn(rectStr) == -1) {
0365                             qCDebug(OCR_LOG) << "Failed to match" << rectStr << "in 'char' line" << charLine;
0366                             continue;
0367                         }
0368 
0369                         QRect r(rx4.cap(1).toInt(), rx4.cap(2).toInt(),
0370                                 rx4.cap(3).toInt(), rx4.cap(4).toInt());
0371                         wordRect |= r;
0372                     }
0373                 }
0374 
0375                 if (detectedChar == ' ') {      // space terminates the word
0376                     if (ocradVersion < 10) {        // offset is relative to block
0377                         wordRect.translate(blockRect.x(), blockRect.y());
0378                     }
0379 
0380                     OcrWordData wd;
0381                     wd.setProperty(OcrWordData::Rectangle, wordRect);
0382                     addWord(word, wd);
0383 
0384                     word = QString();           // reset for next time
0385                     wordRect = QRect();
0386                 } else {
0387                     word.append(detectedChar);      // append char to word
0388                 }
0389             }                       // end of text line loop
0390             ++lineNo;
0391 
0392             if (!word.isEmpty()) {          // last word in line
0393                 if (ocradVersion < 10) {        // offset is relative to block
0394                     wordRect.translate(blockRect.x(), blockRect.y());
0395                 }
0396 
0397                 OcrWordData wd;
0398                 wd.setProperty(OcrWordData::Rectangle, wordRect);
0399                 addWord(word, wd);
0400 
0401                 word = QString();           // reset for next time
0402                 wordRect = QRect();
0403             }
0404 
0405             finishLine();
0406         } else {
0407             qCDebug(OCR_LOG) << "Unknown line format" << line;
0408         }
0409     }
0410 
0411     file.close();                   // finished with ORF file
0412     finishResultDocument();
0413     qCDebug(OCR_LOG) << "Finished analysing ORF";
0414 
0415     return (QString());                 // no error detected
0416 }
0417 
0418 
0419 void OcrOcradEngine::openAdvancedSettings()
0420 {
0421     ExecutablePathDialogue d(nullptr);
0422 
0423     QString exec = KookaSettings::ocrOcradBinary();
0424     if (exec.isEmpty())
0425     {
0426         KConfigSkeletonItem *ski = KookaSettings::self()->ocrOcradBinaryItem();
0427         ski->setDefault();
0428         exec = KookaSettings::ocrOcradBinary();
0429     }
0430 
0431     d.setPath(exec);
0432     d.setLabel(i18n("Name or path of the OCRAD executable:"));
0433     if (!d.exec()) return;
0434 
0435     KookaSettings::setOcrOcradBinary(d.path());
0436 }