File indexing completed on 2024-04-14 05:46:50
0001 /** 0002 * SPDX-FileCopyrightText: 2022 by Alexander Stippich <a.stippich@gmx.net> 0003 * 0004 * SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL 0005 */ 0006 0007 #include "OCREngine.h" 0008 0009 #include "config-skanpage.h" 0010 0011 #include <KLocalizedString> 0012 #include <QPdfWriter> 0013 0014 #if OCR_AVAILABLE 0015 #include <tesseract/baseapi.h> 0016 #include <leptonica/allheaders.h> 0017 #include "OCRLanguageModel.h" 0018 #if TESSERACT_MAJOR_VERSION < 5 0019 #include <tesseract/strngs.h> 0020 #include <tesseract/genericvector.h> 0021 #endif 0022 #endif 0023 0024 #include "skanpage_debug.h" 0025 0026 class OCREnginePrivate { 0027 public: 0028 #if OCR_AVAILABLE 0029 tesseract::TessBaseAPI m_tesseract; 0030 OCRLanguageModel m_languages; 0031 QColor m_penColor = Qt::transparent; 0032 #endif 0033 }; 0034 0035 OCREngine::OCREngine(QObject *parent) : QObject(parent), d(std::make_unique<OCREnginePrivate>()) 0036 { 0037 #if OCR_AVAILABLE 0038 if (d->m_tesseract.Init(nullptr, nullptr)) { // Use a default language, not necessarily English 0039 qCDebug(SKANPAGE_LOG) << "Failed tesseract OCR init"; 0040 return; 0041 } 0042 d->m_tesseract.SetPageSegMode(tesseract::PSM_AUTO_OSD); 0043 0044 std::vector<std::string> availableLanguages; 0045 #if TESSERACT_MAJOR_VERSION < 5 0046 GenericVector<STRING> languageVector; 0047 d->m_tesseract.GetAvailableLanguagesAsVector(&languageVector); 0048 for (int i = 0; i < languageVector.size(); i++) { 0049 availableLanguages.push_back(languageVector[i].c_str()); 0050 } 0051 #else 0052 d->m_tesseract.GetAvailableLanguagesAsVector(&availableLanguages); 0053 #endif 0054 d->m_languages.setLanguages(availableLanguages); 0055 #endif 0056 } 0057 0058 OCREngine::~OCREngine() 0059 { 0060 } 0061 0062 bool OCREngine::available() const 0063 { 0064 return OCR_AVAILABLE; 0065 } 0066 0067 void OCREngine::InitForOCR() 0068 { 0069 #if OCR_AVAILABLE 0070 if (d->m_tesseract.Init(nullptr, d->m_languages.getLanguagesString().c_str())) { 0071 qCDebug(SKANPAGE_LOG) << "Failed tesseract OCR init"; 0072 return; 0073 } 0074 #endif 0075 } 0076 0077 void OCREngine::setColor(QColor color) 0078 { 0079 #if OCR_AVAILABLE 0080 d->m_penColor = color; 0081 #else 0082 Q_UNUSED(color) 0083 #endif 0084 } 0085 0086 OCRLanguageModel *OCREngine::languages() const 0087 { 0088 #if OCR_AVAILABLE 0089 return &d->m_languages; 0090 #else 0091 return nullptr; 0092 #endif 0093 } 0094 0095 void OCREngine::OCRPage(QPdfWriter &writer, QPainter &painter, const SkanpageUtils::PageProperties &page) 0096 { 0097 #if OCR_AVAILABLE 0098 Pix *image = pixRead(page.temporaryFile->fileName().toStdString().c_str()); 0099 d->m_tesseract.SetImage(image); 0100 d->m_tesseract.SetSourceResolution(page.dpi); 0101 d->m_tesseract.Recognize(nullptr); 0102 0103 tesseract::ResultIterator *it = d->m_tesseract.GetIterator(); 0104 tesseract::PageIteratorLevel level = tesseract::RIL_WORD; 0105 painter.setPen(d->m_penColor); 0106 0107 int baseX1, baseY1, baseX2, baseY2, fontID, pointSize; 0108 bool bold, italic, underlined, monospace, serif, smallcaps; 0109 tesseract::Orientation orientation; 0110 tesseract::WritingDirection direction; 0111 tesseract::TextlineOrder order; 0112 float deskew_angle; 0113 0114 QFont font; 0115 /* Currently, each word extracted from tesseract is printed 0116 * at it coordinates with transparent color. 0117 * We stretch each word to the bounding box since the actual used 0118 * fonts may be different. */ 0119 const QTransform oldTransformation = painter.transform(); 0120 if (it != nullptr) { 0121 do { 0122 if (it->Empty(level)) { 0123 continue; 0124 } 0125 const char* word = it->GetUTF8Text(level); 0126 it->Baseline(level, &baseX1, &baseY1, &baseX2, &baseY2); 0127 it->WordFontAttributes(&bold, &italic, &underlined, &monospace, 0128 &serif, &smallcaps, &pointSize, &fontID); 0129 it->Orientation(&orientation, &direction, &order, &deskew_angle); 0130 /* Font attributes other than pointSize do not work 0131 * https://github.com/tesseract-ocr/tesseract/issues/1074 */ 0132 if (pointSize <= 0) { 0133 pointSize = 8; 0134 } 0135 font.setStretch(100); 0136 font.setPointSize(pointSize); 0137 QFontMetrics metrics(font, &writer); 0138 const QString text = QString::fromUtf8(word); 0139 const QRect textBounds = metrics.boundingRect(text); 0140 int stretch = 100; 0141 QTransform transformation; 0142 transformation.translate(baseX1, baseY1); 0143 if (orientation == tesseract::ORIENTATION_PAGE_RIGHT) { 0144 transformation.rotate(90); 0145 stretch = static_cast<double>(baseY2 - baseY1) / textBounds.width() * 100; 0146 } else if (orientation == tesseract::ORIENTATION_PAGE_LEFT) { 0147 transformation.rotate(270); 0148 stretch = static_cast<double>(baseY1 - baseY2) / textBounds.width() * 100; 0149 } else if (orientation == tesseract::ORIENTATION_PAGE_DOWN) { 0150 transformation.rotate(180); 0151 stretch = static_cast<double>(baseX1 - baseX2) / textBounds.width() * 100; 0152 } else { 0153 stretch = static_cast<double>(baseX2 - baseX1) / textBounds.width() * 100; 0154 } 0155 if (stretch <= 0) { 0156 stretch = 100; 0157 } 0158 transformation.rotate(deskew_angle); 0159 font.setStretch(stretch); 0160 transformation.translate(-baseX1, -baseY1); 0161 painter.setFont(font); 0162 painter.setTransform(transformation, true); 0163 painter.drawText(baseX1, baseY1, text); 0164 delete[] word; 0165 painter.setTransform(oldTransformation); 0166 } 0167 while (it->Next(level)); 0168 } 0169 #else 0170 Q_UNUSED(writer) 0171 Q_UNUSED(painter) 0172 Q_UNUSED(page) 0173 #endif 0174 }