File indexing completed on 2024-05-12 15:55:08
0001 /************************************************************************ 0002 * * 0003 * This file is part of Kooka, a scanning/OCR application using * 0004 * Qt <http://www.qt.io> and KDE Frameworks <http://www.kde.org>. * 0005 * * 0006 * Copyright (C) 2000-2016 Klaas Freitag <freitag@suse.de> * 0007 * Jonathan Marten <jjm@keelhaul.me.uk> * 0008 * * 0009 * Kooka is free software; you can redistribute it and/or modify it * 0010 * under the terms of the GNU Library General Public License as * 0011 * published by the Free Software Foundation and appearing in the * 0012 * file COPYING included in the packaging of this file; either * 0013 * version 2 of the License, or (at your option) any later version. * 0014 * * 0015 * As a special exception, permission is given to link this program * 0016 * with any version of the KADMOS OCR/ICR engine (a product of * 0017 * reRecognition GmbH, Kreuzlingen), and distribute the resulting * 0018 * executable without including the source code for KADMOS in the * 0019 * source distribution. * 0020 * * 0021 * This program is distributed in the hope that it will be useful, * 0022 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0023 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0024 * GNU General Public License for more details. * 0025 * * 0026 * You should have received a copy of the GNU General Public * 0027 * License along with this program; see the file COPYING. If * 0028 * not, see <http://www.gnu.org/licenses/>. * 0029 * * 0030 ************************************************************************/ 0031 0032 #include "ocrocradengine.h" 0033 0034 #include <qregexp.h> 0035 #include <qfile.h> 0036 #include <qdir.h> 0037 #include <qfileinfo.h> 0038 #include <qtemporaryfile.h> 0039 #include <qprocess.h> 0040 0041 #include <klocalizedstring.h> 0042 #include <kpluginfactory.h> 0043 0044 #include "imageformat.h" 0045 #include "kookasettings.h" 0046 #include "ocrocraddialog.h" 0047 #include "executablepathdialogue.h" 0048 #include "ocr_logging.h" 0049 0050 0051 K_PLUGIN_FACTORY_WITH_JSON(OcrOcradEngineFactory, "kookaocr-ocrad.json", registerPlugin<OcrOcradEngine>();) 0052 #include "ocrocradengine.moc" 0053 0054 0055 static const char UndetectedChar = '_'; 0056 0057 0058 OcrOcradEngine::OcrOcradEngine(QObject *pnt, const QVariantList &args) 0059 : AbstractOcrEngine(pnt, "OcrOcradEngine") 0060 { 0061 m_ocrImagePBM = QString(); 0062 m_tempOrfName = QString(); 0063 ocradVersion = 0; 0064 } 0065 0066 0067 AbstractOcrDialogue *OcrOcradEngine::createOcrDialogue(AbstractOcrEngine *plugin, QWidget *pnt) 0068 { 0069 return (new OcrOcradDialog(plugin, pnt)); 0070 } 0071 0072 0073 bool OcrOcradEngine::createOcrProcess(AbstractOcrDialogue *dia, ScanImage::Ptr img) 0074 { 0075 OcrOcradDialog *parentDialog = static_cast<OcrOcradDialog *>(dia); 0076 ocradVersion = parentDialog->getNumVersion(); 0077 0078 const QString cmd = parentDialog->getOCRCmd(); 0079 0080 const QString ocrResultFile = tempSaveImage(img, ImageFormat("BMP"), 8); 0081 setResultImage(ocrResultFile); 0082 // TODO: if the input file is local and is readable by OCRAD, 0083 // can use it directly (but don't delete it afterwards!) 0084 m_ocrImagePBM = tempSaveImage(img, ImageFormat("PBM"), 1); 0085 0086 QProcess *proc = initOcrProcess(); // start process for OCR 0087 QStringList args; // arguments for process 0088 0089 m_tempOrfName = tempFileName("orf"); 0090 args << "-x" << m_tempOrfName; // the ORF result file 0091 0092 args << QFile::encodeName(m_ocrImagePBM); // name of the input image 0093 0094 // Layout Detection 0095 int layoutMode = KookaSettings::ocrOcradLayoutDetection(); 0096 if (ocradVersion >= 18) // OCRAD 0.18 or later 0097 { // has only on/off 0098 if (layoutMode != 0) args << "-l"; 0099 } 0100 else // OCRAD 0.17 or earlier 0101 { // had 3 options 0102 args << "-l" << QString::number(layoutMode); 0103 } 0104 0105 QString s = KookaSettings::ocrOcradFormat(); 0106 if (!s.isEmpty()) args << "-F" << s; 0107 0108 s = KookaSettings::ocrOcradCharset(); 0109 if (!s.isEmpty()) args << "-c" << s; 0110 0111 s = KookaSettings::ocrOcradFilter(); 0112 if (!s.isEmpty()) args << "-e" << s; 0113 0114 s = KookaSettings::ocrOcradTransform(); 0115 if (!s.isEmpty()) args << "-t" << s; 0116 0117 if (KookaSettings::ocrOcradInvert()) args << "-i"; 0118 0119 if (KookaSettings::ocrOcradThresholdEnable()) { 0120 s = KookaSettings::ocrOcradThresholdValue(); 0121 if (!s.isEmpty()) args << "-T" << (s + "%"); 0122 } 0123 0124 if (verboseDebug()) args << "-v"; 0125 0126 s = KookaSettings::ocrOcradExtraArguments(); 0127 if (!s.isEmpty()) args << s; 0128 0129 proc->setProgram(cmd); 0130 proc->setArguments(args); 0131 0132 proc->setProcessChannelMode(QProcess::SeparateChannels); 0133 m_tempStdoutLog = tempFileName("stdout.log"); 0134 proc->setStandardOutputFile(m_tempStdoutLog); 0135 0136 return (runOcrProcess()); 0137 } 0138 0139 0140 QStringList OcrOcradEngine::tempFiles(bool retain) 0141 { 0142 QStringList result; 0143 result << m_ocrImagePBM; 0144 result << m_tempOrfName; 0145 result << m_tempStdoutLog; 0146 0147 return (result); 0148 } 0149 0150 0151 bool OcrOcradEngine::finishedOcrProcess(QProcess *proc) 0152 { 0153 qCDebug(OCR_LOG); 0154 QString errStr = readORF(m_tempOrfName); // parse the OCR results 0155 if (errStr.isEmpty()) return (true); // parsed successfulyl 0156 0157 setErrorText(errStr); // record the parse error 0158 return (false); // parsing failed 0159 } 0160 0161 0162 /* 0163 From http://kooka.kde.org/news/ 0164 0165 ORF Proposal: Ocr Result File August 20, 2003 0166 0167 Ocrad is the first OCR (Optical Character Recognition) application that implements 0168 output of OCR results in a special file format that could be easily processed by 0169 frontend programs. To provide a proper frontend connection, ocrad implements the 0170 export of the OCR results into a so called ORF, which simply means Ocr Result File. 0171 0172 The ORF Format is a special file format that contains OCR results like the detected 0173 characters and their position on the source image in a simply parseable format. 0174 Frontend programs can read the file and retrieve information about the OCR engine 0175 run and show up the results visually. 0176 0177 All lines starting with '#' are ignored. 0178 0179 The first valid line has the form 'source file filename', where 'filename' is the 0180 name of the PBM file being processed. 0181 0182 The second valid line has the form 'total blocks n', where 'n' is the total number 0183 of text blocks in the source image. 0184 0185 For each text block in the source image, the following data follows: 0186 0187 A line in the form 'block i x y w h', where 'i' is the block number and 'x y w h' 0188 are the block position and size as described below for character boxes. 0189 0190 A line in the form 'lines n', where 'n' is the number of lines in this block. 0191 0192 For each line in every text block, the following data follows: 0193 0194 A line in the form 'line i chars n height h', where 'i' is the line number, 0195 'n' is the number of characters in this line, 0196 and 'h' is the mean height of the characters in this line (in pixels). 0197 0198 n lines (one for every character) in the form "x y w h b;g[,'c'v]...". 0199 'x' = the left border (x-coordinate) of the char bounding box in the source image (in pixels). 0200 'y' = the top border (y-coordinate). 0201 'w' = the width of the bounding box. 0202 'h' = the height of the bounding box. 0203 'b' = the percent of black pixels in the bounding box. 0204 'g' = the number of different recognition guesses for this character. 0205 0206 The result characters follow after the number of guesses in the form of a 0207 comma-separated list of pairs. Every pair is formed by the actual recognised 0208 char enclosed in single quotes, followed by the confidence value without 0209 space between them. 0210 0211 See the following snippet (the beginning of an orf) as a sample ORF: 0212 0213 # Ocr Results File. Created by GNU ocrad version 0.4 0214 source file test1.pbm 0215 total blocks 1 0216 block 1 0 0 560 792 0217 lines 12 0218 line 1 chars 10 height 26 0219 71 109 17 26;2,'0'1,'o'0 0220 93 109 15 26;2,'1'1,'l'0 0221 110 109 18 26;1,'2'0 0222 131 109 18 26;1,'3'0 0223 151 109 19 26;1,'4'0 0224 172 109 17 26;1,'5'0 0225 193 109 17 26;1,'6'0 0226 213 108 17 27;1,'7'0 0227 232 109 18 26;1,'8'0 0228 253 109 17 26;1,'9'0 0229 line 2 chars 14 height 27 0230 68 153 29 27;1,'A'0 0231 97 153 24 27;1,'B'0 0232 ... 0233 0234 The ORF format was defined by Antonio Diaz and Klaas Freitag. Comments are very 0235 welcome. 0236 */ 0237 0238 QString OcrOcradEngine::readORF(const QString &fileName) 0239 { 0240 QFile file(fileName); 0241 // some checks on the ORF 0242 if (!file.exists()) { 0243 return (xi18nc("@info", "File <filename>%1</filename> does not exist", fileName)); 0244 } 0245 QFileInfo fi(fileName); 0246 if (!fi.isReadable()) { 0247 return (xi18nc("@info", "File <filename>%1</filename> unreadable", fileName)); 0248 } 0249 0250 if (!file.open(QIODevice::ReadOnly)) { 0251 return (xi18nc("@info", "Cannot open file <filename>%1</filename>", fileName)); 0252 } 0253 QTextStream stream(&file); 0254 0255 qCDebug(OCR_LOG) << "Starting to analyse ORF" << fileName << "version" << ocradVersion; 0256 0257 // to match "block 1 0 0 560 792" 0258 const QRegExp rx1("^.*block\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)"); 0259 // to match "line 5 chars 13 height 20" 0260 const QRegExp rx2("^line\\s+(\\d+)\\s+chars\\s+(\\d+)\\s+height\\s+\\d+"); 0261 // to match " 1, 'r'0" 0262 const QRegExp rx3("^\\s*(\\d+)"); 0263 // to match "110 109 18 26" 0264 const QRegExp rx4("(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)"); 0265 0266 /* use a global line number counter here, not the one from the orf. The orf one 0267 * starts at 0 for every block, but we want line-no counting page global here. 0268 */ 0269 int lineNo = 0; 0270 int blockCnt = 0; 0271 QString line; 0272 QRect blockRect; 0273 0274 startResultDocument(); 0275 0276 while (!stream.atEnd()) { 0277 line = stream.readLine().trimmed(); // line of text excluding '\n' 0278 0279 if (line.startsWith("#")) { 0280 continue; // ignore comments 0281 } 0282 0283 if (verboseDebug()) { 0284 qCDebug(OCR_LOG) << "# Line" << line; 0285 } 0286 if (line.startsWith("source file ")) { 0287 continue; // source file name, ignore 0288 } else if (line.startsWith("total blocks ")) { // total count of blocks, 0289 // must be first line 0290 blockCnt = line.mid(13).toInt(); 0291 qCDebug(OCR_LOG) << "Block count (V<10)" << blockCnt; 0292 } else if (line.startsWith("total text blocks ")) { 0293 blockCnt = line.mid(18).toInt(); 0294 qCDebug(OCR_LOG) << "Block count (V>10)" << blockCnt; 0295 } else if (line.startsWith("block ") || line.startsWith("text block ")) { 0296 // start of text block 0297 // matching "block 1 0 0 560 792" 0298 if (rx1.indexIn(line) == -1) { 0299 qCDebug(OCR_LOG) << "Failed to match 'block' line" << line; 0300 continue; 0301 } 0302 0303 int currBlock = (rx1.cap(1).toInt()) - 1; 0304 blockRect.setRect(rx1.cap(2).toInt(), rx1.cap(3).toInt(), 0305 rx1.cap(4).toInt(), rx1.cap(5).toInt()); 0306 if (verboseDebug()) qCDebug(OCR_LOG) << "Current block" << currBlock << "rect" << blockRect; 0307 } else if (line.startsWith("lines ")) { // lines in this block 0308 if (verboseDebug()) qCDebug(OCR_LOG) << "Block line count" << line.mid(6).toInt(); 0309 } else if (line.startsWith("line ")) { // start of text line 0310 startLine(); 0311 0312 if (rx2.indexIn(line) == -1) { 0313 qCDebug(OCR_LOG) << "Failed to match 'line' line" << line; 0314 continue; 0315 } 0316 0317 int charCount = rx2.cap(2).toInt(); 0318 if (verboseDebug()) qCDebug(OCR_LOG) << "Expecting" << charCount << "chars for line" << lineNo; 0319 0320 QString word; 0321 QRect wordRect; 0322 0323 for (int c = 0; c < charCount && !stream.atEnd(); ++c) { 0324 // read one line per character 0325 QString charLine = stream.readLine(); 0326 int semiPos = charLine.indexOf(';'); 0327 if (semiPos == -1) { 0328 qCDebug(OCR_LOG) << "No ';' in 'char' line" << charLine; 0329 continue; 0330 } 0331 0332 // rectStr contains the rectangle of the character 0333 QString rectStr = charLine.left(semiPos); 0334 // resultStr contains the OCRed result character(s) 0335 QString resultStr = charLine.mid(semiPos + 1); 0336 0337 QChar detectedChar = UndetectedChar; 0338 0339 // find how many alternatives, matching " 1, 'r'0" 0340 if (rx3.indexIn(resultStr) == -1) { 0341 qCDebug(OCR_LOG) << "Failed to match" << resultStr << "in 'char' line" << charLine; 0342 continue; 0343 } 0344 0345 int altCount = rx3.cap(1).toInt(); 0346 if (altCount == 0) { // no alternatives, 0347 // undecipherable character 0348 if (verboseDebug()) { 0349 qCDebug(OCR_LOG) << "Undecipherable character in 'char' line" << charLine; 0350 } 0351 } else { 0352 int h = resultStr.indexOf(','); 0353 if (h == -1) { 0354 qCDebug(OCR_LOG) << "No ',' in" << resultStr << "in 'char' line" << charLine; 0355 continue; 0356 } 0357 resultStr = resultStr.remove(0, h + 1).trimmed(); 0358 0359 // TODO: this only uses the first alternative 0360 detectedChar = resultStr.at(1); 0361 0362 // Analyse the result rectangle 0363 if (detectedChar != ' ') { 0364 if (rx4.indexIn(rectStr) == -1) { 0365 qCDebug(OCR_LOG) << "Failed to match" << rectStr << "in 'char' line" << charLine; 0366 continue; 0367 } 0368 0369 QRect r(rx4.cap(1).toInt(), rx4.cap(2).toInt(), 0370 rx4.cap(3).toInt(), rx4.cap(4).toInt()); 0371 wordRect |= r; 0372 } 0373 } 0374 0375 if (detectedChar == ' ') { // space terminates the word 0376 if (ocradVersion < 10) { // offset is relative to block 0377 wordRect.translate(blockRect.x(), blockRect.y()); 0378 } 0379 0380 OcrWordData wd; 0381 wd.setProperty(OcrWordData::Rectangle, wordRect); 0382 addWord(word, wd); 0383 0384 word = QString(); // reset for next time 0385 wordRect = QRect(); 0386 } else { 0387 word.append(detectedChar); // append char to word 0388 } 0389 } // end of text line loop 0390 ++lineNo; 0391 0392 if (!word.isEmpty()) { // last word in line 0393 if (ocradVersion < 10) { // offset is relative to block 0394 wordRect.translate(blockRect.x(), blockRect.y()); 0395 } 0396 0397 OcrWordData wd; 0398 wd.setProperty(OcrWordData::Rectangle, wordRect); 0399 addWord(word, wd); 0400 0401 word = QString(); // reset for next time 0402 wordRect = QRect(); 0403 } 0404 0405 finishLine(); 0406 } else { 0407 qCDebug(OCR_LOG) << "Unknown line format" << line; 0408 } 0409 } 0410 0411 file.close(); // finished with ORF file 0412 finishResultDocument(); 0413 qCDebug(OCR_LOG) << "Finished analysing ORF"; 0414 0415 return (QString()); // no error detected 0416 } 0417 0418 0419 void OcrOcradEngine::openAdvancedSettings() 0420 { 0421 ExecutablePathDialogue d(nullptr); 0422 0423 QString exec = KookaSettings::ocrOcradBinary(); 0424 if (exec.isEmpty()) 0425 { 0426 KConfigSkeletonItem *ski = KookaSettings::self()->ocrOcradBinaryItem(); 0427 ski->setDefault(); 0428 exec = KookaSettings::ocrOcradBinary(); 0429 } 0430 0431 d.setPath(exec); 0432 d.setLabel(i18n("Name or path of the OCRAD executable:")); 0433 if (!d.exec()) return; 0434 0435 KookaSettings::setOcrOcradBinary(d.path()); 0436 }