File indexing completed on 2024-04-21 15:12:05
0001 /************************************************************************ 0002 * * 0003 * This file is part of Kooka, a scanning/OCR application using * 0004 * Qt <http://www.qt.io> and KDE Frameworks <http://www.kde.org>. * 0005 * * 0006 * Copyright (C) 2003-2016 Klaas Freitag <freitag@suse.de> * 0007 * Jonathan Marten <jjm@keelhaul.me.uk> * 0008 * * 0009 * Kooka is free software; you can redistribute it and/or modify it * 0010 * under the terms of the GNU Library General Public License as * 0011 * published by the Free Software Foundation and appearing in the * 0012 * file COPYING included in the packaging of this file; either * 0013 * version 2 of the License, or (at your option) any later version. * 0014 * * 0015 * As a special exception, permission is given to link this program * 0016 * with any version of the KADMOS OCR/ICR engine (a product of * 0017 * reRecognition GmbH, Kreuzlingen), and distribute the resulting * 0018 * executable without including the source code for KADMOS in the * 0019 * source distribution. * 0020 * * 0021 * This program is distributed in the hope that it will be useful, * 0022 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 0023 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 0024 * GNU General Public License for more details. * 0025 * * 0026 * You should have received a copy of the GNU General Public * 0027 * License along with this program; see the file COPYING. If * 0028 * not, see <http://www.gnu.org/licenses/>. * 0029 * * 0030 ************************************************************************/ 0031 0032 #include "ocrresedit.h" 0033 0034 #ifdef HAVE_ERRNO_H 0035 #include <errno.h> 0036 #endif 0037 #ifdef HAVE_STRERROR 0038 #include <string.h> 0039 #endif 0040 0041 #include <qcolor.h> 0042 #include <qfile.h> 0043 #include <qtextstream.h> 0044 #include <qfiledialog.h> 0045 0046 #include <klocalizedstring.h> 0047 #include <kmessagebox.h> 0048 0049 #include "abstractocrengine.h" 0050 #include "recentsaver.h" 0051 #include "kooka_logging.h" 0052 0053 // The OCR results are stored in our text document. Each OCR'ed word has 0054 // properties stored in its QTextCharFormat recording the word rectangle 0055 // (if the OCR engine provides this information) and possibly other details 0056 // also. We can read out those properties again to highlight the relevant 0057 // part of the result image when a cursor move or selection is made. 0058 // 0059 // Spell checking mostly uses KTextEdit's built in spell checking support 0060 // (which uses Sonnet). 0061 // 0062 // Caution: if the spell checking dialogue is cancelled, the text format 0063 // properties will be lost - the symptom of this is that the same place in 0064 // the result image will be highlighted no matter where in the text the 0065 // cursor or selection is. This is bug 229150, hopefully fixed in KDE SC 4.5. 0066 0067 OcrResEdit::OcrResEdit(QWidget *parent) 0068 : KTextEdit(parent) 0069 { 0070 setObjectName("OcrResEdit"); 0071 0072 setTabChangesFocus(true); // will never OCR these 0073 slotSetReadOnly(true); // initially, anyway 0074 0075 connect(this, &OcrResEdit::cursorPositionChanged, this, &OcrResEdit::slotUpdateHighlight); 0076 0077 // TODO: monitor textChanged() signal, if document emptied (cleared) 0078 // then tell OCR engine to stop tracking and double clicks 0079 // then ImageCanvas can disable selection if tracking active (because it 0080 // doesn't paint properly). 0081 } 0082 0083 static void moveForward(QTextCursor &curs, bool once = true) 0084 { 0085 if (once) { 0086 curs.movePosition(QTextCursor::NextCharacter); 0087 } 0088 while (curs.atBlockStart()) { 0089 curs.movePosition(QTextCursor::NextCharacter); 0090 } 0091 } 0092 0093 void OcrResEdit::slotSelectWord(const QPoint &pos) 0094 { 0095 if (document()->isEmpty()) { 0096 return; // nothing to search 0097 } 0098 0099 //qCDebug(KOOKA_LOG) << pos; 0100 0101 QTextCursor curs(document()); // start of document 0102 QRect wordRect; 0103 0104 // First find the start of the word corresponding to the clicked point 0105 0106 moveForward(curs, false); 0107 while (!curs.atEnd()) { 0108 QTextCharFormat fmt = curs.charFormat(); 0109 QRect rect = fmt.property(OcrWordData::Rectangle).toRect(); 0110 ////qCDebug(KOOKA_LOG) << "at" << curs.position() << "rect" << rect; 0111 if (rect.isValid() && rect.contains(pos, true)) { 0112 wordRect = rect; 0113 break; 0114 } 0115 moveForward(curs); 0116 } 0117 0118 //qCDebug(KOOKA_LOG) << "found rect" << wordRect << "at" << curs.position(); 0119 0120 if (!wordRect.isValid()) { 0121 return; // no word found 0122 } 0123 0124 // Then find the end of the word. That is an OCR result word, i.e. a 0125 // span with the same character format, not a text word ended by whitespace. 0126 0127 QTextCursor wordStart = curs; 0128 QTextCharFormat ref = wordStart.charFormat(); 0129 0130 moveForward(curs); 0131 while (!curs.atEnd()) { 0132 QTextCharFormat fmt = curs.charFormat(); 0133 ////qCDebug(KOOKA_LOG) << "at" << curs.position() << "rect" << fmt.property(OcrWordData::Rectangle).toRect(); 0134 if (fmt != ref) { 0135 ////qCDebug(KOOKA_LOG) << "mismatch at" << curs.position(); 0136 break; 0137 } 0138 moveForward(curs); 0139 } 0140 0141 curs.movePosition(QTextCursor::PreviousCharacter); 0142 //qCDebug(KOOKA_LOG) << "word start" << wordStart.position() << "end" << curs.position(); 0143 int pos1 = wordStart.position(); 0144 int pos2 = curs.position(); 0145 if (pos1 == pos2) { 0146 return; // no word found 0147 } 0148 0149 QTextCursor wc(document()); 0150 wc.setPosition(wordStart.position() - 1, QTextCursor::MoveAnchor); 0151 wc.setPosition(curs.position(), QTextCursor::KeepAnchor); 0152 setTextCursor(wc); 0153 ensureCursorVisible(); 0154 } 0155 0156 void OcrResEdit::slotSaveText() 0157 { 0158 RecentSaver saver("saveOCR"); 0159 QString fileName = QFileDialog::getSaveFileName(this, i18n("Save OCR Result Text"), 0160 saver.recentPath(), i18n("Text File (*.txt)")); 0161 if (fileName.isEmpty()) return; 0162 saver.save(fileName); 0163 0164 QFile file(fileName); 0165 if (!file.open(QIODevice::WriteOnly)) { 0166 QString msg = xi18nc("@info", "Unable to save the OCR results file<nl/><filename>%1</filename>", fileName); 0167 #ifdef HAVE_STRERROR 0168 msg += xi18nc("@info", "<nl/>%1", strerror(errno)); 0169 #endif 0170 KMessageBox::error(this, msg, i18n("Error saving OCR results")); 0171 return; 0172 } 0173 0174 QTextStream stream(&file); 0175 stream << toPlainText(); 0176 file.close(); 0177 } 0178 0179 void OcrResEdit::slotUpdateHighlight() 0180 { 0181 if (isReadOnly()) { 0182 return; 0183 } 0184 ////qCDebug(KOOKA_LOG) << "pos" << textCursor().position() << "hassel" << textCursor().hasSelection() 0185 // << "start" << textCursor().selectionStart() << "end" << textCursor().selectionEnd(); 0186 0187 QTextCursor curs = textCursor(); // will not move cursor, see 0188 // QTextEdit::textCursor() doc 0189 if (curs.hasSelection()) { 0190 ////qCDebug(KOOKA_LOG) << "sel start" << curs.selectionStart() << "end" << curs.selectionEnd(); 0191 0192 int send = curs.selectionEnd(); 0193 curs.setPosition(curs.selectionStart()); 0194 curs.movePosition(QTextCursor::NextCharacter); 0195 QTextCharFormat ref = curs.charFormat(); 0196 ////qCDebug(KOOKA_LOG) << "at" << curs.position() << "format rect" << ref.property(OcrWordData::Rectangle).toRect(); 0197 bool same = true; 0198 0199 while (curs.position() != send) { 0200 curs.movePosition(QTextCursor::NextCharacter); 0201 QTextCharFormat fmt = curs.charFormat(); 0202 ////qCDebug(KOOKA_LOG) << "at" << curs.position() << "format rect" << fmt.property(OcrWordData::Rectangle).toRect(); 0203 if (fmt != ref) { 0204 ////qCDebug(KOOKA_LOG) << "mismatch at" << curs.position(); 0205 same = false; 0206 break; 0207 } 0208 } 0209 0210 ////qCDebug(KOOKA_LOG) << "range same format?" << same; 0211 if (same) { // valid word selection 0212 QRect r = ref.property(OcrWordData::Rectangle).toRect(); 0213 ////qCDebug(KOOKA_LOG) << "rect" << r; 0214 emit highlightWord(r); 0215 return; 0216 } 0217 } 0218 0219 emit highlightWord(QRect()); // no valid word selection, 0220 // clear highlight 0221 QTextCharFormat fmt = textCursor().charFormat(); 0222 QRect r = fmt.property(OcrWordData::Rectangle).toRect(); 0223 if (r.isValid()) { 0224 emit scrollToWord(r); // scroll to cursor position 0225 } 0226 } 0227 0228 // QTextEdit::setReadOnly() is no longer a slot in Qt4! 0229 void OcrResEdit::slotSetReadOnly(bool isRO) 0230 { 0231 setReadOnly(isRO); 0232 if (isRO) setCheckSpellingEnabled(false); 0233 }