File indexing completed on 2024-04-21 15:12:05

0001 /************************************************************************
0002  *                                  *
0003  *  This file is part of Kooka, a scanning/OCR application using    *
0004  *  Qt <http://www.qt.io> and KDE Frameworks <http://www.kde.org>.  *
0005  *                                  *
0006  *  Copyright (C) 2003-2016 Klaas Freitag <freitag@suse.de>     *
0007  *                          Jonathan Marten <jjm@keelhaul.me.uk>    *
0008  *                                  *
0009  *  Kooka is free software; you can redistribute it and/or modify it    *
0010  *  under the terms of the GNU Library General Public License as    *
0011  *  published by the Free Software Foundation and appearing in the  *
0012  *  file COPYING included in the packaging of this file;  either    *
0013  *  version 2 of the License, or (at your option) any later version.    *
0014  *                                  *
0015  *  As a special exception, permission is given to link this program    *
0016  *  with any version of the KADMOS OCR/ICR engine (a product of     *
0017  *  reRecognition GmbH, Kreuzlingen), and distribute the resulting  *
0018  *  executable without including the source code for KADMOS in the  *
0019  *  source distribution.                        *
0020  *                                  *
0021  *  This program is distributed in the hope that it will be useful, *
0022  *  but WITHOUT ANY WARRANTY; without even the implied warranty of  *
0023  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the   *
0024  *  GNU General Public License for more details.            *
0025  *                                  *
0026  *  You should have received a copy of the GNU General Public       *
0027  *  License along with this program;  see the file COPYING.  If     *
0028  *  not, see <http://www.gnu.org/licenses/>.                *
0029  *                                  *
0030  ************************************************************************/
0031 
0032 #include "ocrresedit.h"
0033 
0034 #ifdef HAVE_ERRNO_H
0035 #include <errno.h>
0036 #endif
0037 #ifdef HAVE_STRERROR
0038 #include <string.h>
0039 #endif
0040 
0041 #include <qcolor.h>
0042 #include <qfile.h>
0043 #include <qtextstream.h>
0044 #include <qfiledialog.h>
0045 
0046 #include <klocalizedstring.h>
0047 #include <kmessagebox.h>
0048 
0049 #include "abstractocrengine.h"
0050 #include "recentsaver.h"
0051 #include "kooka_logging.h"
0052 
0053 //  The OCR results are stored in our text document.  Each OCR'ed word has
0054 //  properties stored in its QTextCharFormat recording the word rectangle
0055 //  (if the OCR engine provides this information) and possibly other details
0056 //  also.  We can read out those properties again to highlight the relevant
0057 //  part of the result image when a cursor move or selection is made.
0058 //
0059 //  Spell checking mostly uses KTextEdit's built in spell checking support
0060 //  (which uses Sonnet).
0061 //
0062 //  Caution:  if the spell checking dialogue is cancelled, the text format
0063 //  properties will be lost - the symptom of this is that the same place in
0064 //  the result image will be highlighted no matter where in the text the
0065 //  cursor or selection is.  This is bug 229150, hopefully fixed in KDE SC 4.5.
0066 
0067 OcrResEdit::OcrResEdit(QWidget *parent)
0068     : KTextEdit(parent)
0069 {
0070     setObjectName("OcrResEdit");
0071 
0072     setTabChangesFocus(true);               // will never OCR these
0073     slotSetReadOnly(true);              // initially, anyway
0074 
0075     connect(this, &OcrResEdit::cursorPositionChanged, this, &OcrResEdit::slotUpdateHighlight);
0076 
0077 // TODO: monitor textChanged() signal, if document emptied (cleared)
0078 // then tell OCR engine to stop tracking and double clicks
0079 // then ImageCanvas can disable selection if tracking active (because it
0080 // doesn't paint properly).
0081 }
0082 
0083 static void moveForward(QTextCursor &curs, bool once = true)
0084 {
0085     if (once) {
0086         curs.movePosition(QTextCursor::NextCharacter);
0087     }
0088     while (curs.atBlockStart()) {
0089         curs.movePosition(QTextCursor::NextCharacter);
0090     }
0091 }
0092 
0093 void OcrResEdit::slotSelectWord(const QPoint &pos)
0094 {
0095     if (document()->isEmpty()) {
0096         return;    // nothing to search
0097     }
0098 
0099     //qCDebug(KOOKA_LOG) << pos;
0100 
0101     QTextCursor curs(document());           // start of document
0102     QRect wordRect;
0103 
0104     // First find the start of the word corresponding to the clicked point
0105 
0106     moveForward(curs, false);
0107     while (!curs.atEnd()) {
0108         QTextCharFormat fmt = curs.charFormat();
0109         QRect rect = fmt.property(OcrWordData::Rectangle).toRect();
0110         ////qCDebug(KOOKA_LOG) << "at" << curs.position() << "rect" << rect;
0111         if (rect.isValid() && rect.contains(pos, true)) {
0112             wordRect = rect;
0113             break;
0114         }
0115         moveForward(curs);
0116     }
0117 
0118     //qCDebug(KOOKA_LOG) << "found rect" << wordRect << "at" << curs.position();
0119 
0120     if (!wordRect.isValid()) {
0121         return;    // no word found
0122     }
0123 
0124     // Then find the end of the word.  That is an OCR result word, i.e. a
0125     // span with the same character format, not a text word ended by whitespace.
0126 
0127     QTextCursor wordStart = curs;
0128     QTextCharFormat ref = wordStart.charFormat();
0129 
0130     moveForward(curs);
0131     while (!curs.atEnd()) {
0132         QTextCharFormat fmt = curs.charFormat();
0133         ////qCDebug(KOOKA_LOG) << "at" << curs.position() << "rect" << fmt.property(OcrWordData::Rectangle).toRect();
0134         if (fmt != ref) {
0135             ////qCDebug(KOOKA_LOG) << "mismatch at" << curs.position();
0136             break;
0137         }
0138         moveForward(curs);
0139     }
0140 
0141     curs.movePosition(QTextCursor::PreviousCharacter);
0142     //qCDebug(KOOKA_LOG) << "word start" << wordStart.position() << "end" << curs.position();
0143     int pos1 = wordStart.position();
0144     int pos2 = curs.position();
0145     if (pos1 == pos2) {
0146         return;    // no word found
0147     }
0148 
0149     QTextCursor wc(document());
0150     wc.setPosition(wordStart.position() - 1, QTextCursor::MoveAnchor);
0151     wc.setPosition(curs.position(), QTextCursor::KeepAnchor);
0152     setTextCursor(wc);
0153     ensureCursorVisible();
0154 }
0155 
0156 void OcrResEdit::slotSaveText()
0157 {
0158     RecentSaver saver("saveOCR");
0159     QString fileName = QFileDialog::getSaveFileName(this, i18n("Save OCR Result Text"),
0160                                                     saver.recentPath(), i18n("Text File (*.txt)"));
0161     if (fileName.isEmpty()) return;
0162     saver.save(fileName);
0163 
0164     QFile file(fileName);
0165     if (!file.open(QIODevice::WriteOnly)) {
0166         QString msg = xi18nc("@info", "Unable to save the OCR results file<nl/><filename>%1</filename>", fileName);
0167 #ifdef HAVE_STRERROR
0168         msg += xi18nc("@info", "<nl/>%1", strerror(errno));
0169 #endif
0170         KMessageBox::error(this, msg, i18n("Error saving OCR results"));
0171         return;
0172     }
0173 
0174     QTextStream stream(&file);
0175     stream << toPlainText();
0176     file.close();
0177 }
0178 
0179 void OcrResEdit::slotUpdateHighlight()
0180 {
0181     if (isReadOnly()) {
0182         return;
0183     }
0184     ////qCDebug(KOOKA_LOG) << "pos" << textCursor().position() << "hassel" << textCursor().hasSelection()
0185     //         << "start" << textCursor().selectionStart() << "end" << textCursor().selectionEnd();
0186 
0187     QTextCursor curs = textCursor();            // will not move cursor, see
0188                             // QTextEdit::textCursor() doc
0189     if (curs.hasSelection()) {
0190         ////qCDebug(KOOKA_LOG) << "sel start" << curs.selectionStart() << "end" << curs.selectionEnd();
0191 
0192         int send = curs.selectionEnd();
0193         curs.setPosition(curs.selectionStart());
0194         curs.movePosition(QTextCursor::NextCharacter);
0195         QTextCharFormat ref = curs.charFormat();
0196         ////qCDebug(KOOKA_LOG) << "at" << curs.position() << "format rect" << ref.property(OcrWordData::Rectangle).toRect();
0197         bool same = true;
0198 
0199         while (curs.position() != send) {
0200             curs.movePosition(QTextCursor::NextCharacter);
0201             QTextCharFormat fmt = curs.charFormat();
0202             ////qCDebug(KOOKA_LOG) << "at" << curs.position() << "format rect" << fmt.property(OcrWordData::Rectangle).toRect();
0203             if (fmt != ref) {
0204                 ////qCDebug(KOOKA_LOG) << "mismatch at" << curs.position();
0205                 same = false;
0206                 break;
0207             }
0208         }
0209 
0210         ////qCDebug(KOOKA_LOG) << "range same format?" << same;
0211         if (same) {                 // valid word selection
0212             QRect r = ref.property(OcrWordData::Rectangle).toRect();
0213             ////qCDebug(KOOKA_LOG) << "rect" << r;
0214             emit highlightWord(r);
0215             return;
0216         }
0217     }
0218 
0219     emit highlightWord(QRect());            // no valid word selection,
0220     // clear highlight
0221     QTextCharFormat fmt = textCursor().charFormat();
0222     QRect r = fmt.property(OcrWordData::Rectangle).toRect();
0223     if (r.isValid()) {
0224         emit scrollToWord(r);    // scroll to cursor position
0225     }
0226 }
0227 
0228 // QTextEdit::setReadOnly() is no longer a slot in Qt4!
0229 void OcrResEdit::slotSetReadOnly(bool isRO)
0230 {
0231     setReadOnly(isRO);
0232     if (isRO) setCheckSpellingEnabled(false);
0233 }