File indexing completed on 2024-04-14 05:46:50

0001 /**
0002  * SPDX-FileCopyrightText: 2022 by Alexander Stippich <a.stippich@gmx.net>
0003  *
0004  * SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
0005  */
0006 
0007 #include "OCREngine.h"
0008 
0009 #include "config-skanpage.h"
0010 
0011 #include <KLocalizedString>
0012 #include <QPdfWriter>
0013 
0014 #if OCR_AVAILABLE
0015 #include <tesseract/baseapi.h>
0016 #include <leptonica/allheaders.h>
0017 #include "OCRLanguageModel.h"
0018 #if TESSERACT_MAJOR_VERSION < 5
0019 #include <tesseract/strngs.h>
0020 #include <tesseract/genericvector.h>
0021 #endif
0022 #endif
0023 
0024 #include "skanpage_debug.h"
0025 
0026 class OCREnginePrivate {
0027 public:
0028 #if OCR_AVAILABLE
0029     tesseract::TessBaseAPI m_tesseract;
0030     OCRLanguageModel m_languages;
0031     QColor m_penColor = Qt::transparent;
0032 #endif
0033 };
0034 
0035 OCREngine::OCREngine(QObject *parent) : QObject(parent), d(std::make_unique<OCREnginePrivate>())
0036 {
0037 #if OCR_AVAILABLE
0038     if (d->m_tesseract.Init(nullptr, nullptr)) { // Use a default language, not necessarily English
0039         qCDebug(SKANPAGE_LOG) << "Failed tesseract OCR init";
0040         return;
0041     }
0042     d->m_tesseract.SetPageSegMode(tesseract::PSM_AUTO_OSD);
0043 
0044     std::vector<std::string> availableLanguages;
0045 #if TESSERACT_MAJOR_VERSION < 5
0046     GenericVector<STRING> languageVector;
0047     d->m_tesseract.GetAvailableLanguagesAsVector(&languageVector);
0048     for (int i = 0; i < languageVector.size(); i++) {
0049         availableLanguages.push_back(languageVector[i].c_str());
0050     }
0051 #else
0052     d->m_tesseract.GetAvailableLanguagesAsVector(&availableLanguages);
0053 #endif
0054     d->m_languages.setLanguages(availableLanguages);
0055 #endif
0056 }
0057 
0058 OCREngine::~OCREngine()
0059 {
0060 }
0061 
0062 bool OCREngine::available() const
0063 {
0064     return OCR_AVAILABLE;
0065 }
0066 
0067 void OCREngine::InitForOCR()
0068 {
0069 #if OCR_AVAILABLE
0070     if (d->m_tesseract.Init(nullptr, d->m_languages.getLanguagesString().c_str())) {
0071         qCDebug(SKANPAGE_LOG) << "Failed tesseract OCR init";
0072         return;
0073     }
0074 #endif
0075 }
0076 
0077 void OCREngine::setColor(QColor color)
0078 {
0079 #if OCR_AVAILABLE
0080     d->m_penColor = color;
0081 #else
0082     Q_UNUSED(color)
0083 #endif   
0084 }
0085 
0086 OCRLanguageModel *OCREngine::languages() const
0087 {
0088 #if OCR_AVAILABLE
0089     return &d->m_languages;
0090 #else
0091     return nullptr;
0092 #endif
0093 }
0094 
0095 void OCREngine::OCRPage(QPdfWriter &writer, QPainter &painter, const SkanpageUtils::PageProperties &page)
0096 {
0097 #if OCR_AVAILABLE
0098     Pix *image = pixRead(page.temporaryFile->fileName().toStdString().c_str());
0099     d->m_tesseract.SetImage(image);
0100     d->m_tesseract.SetSourceResolution(page.dpi);
0101     d->m_tesseract.Recognize(nullptr);
0102 
0103     tesseract::ResultIterator *it = d->m_tesseract.GetIterator();
0104     tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
0105     painter.setPen(d->m_penColor);
0106 
0107     int baseX1, baseY1, baseX2, baseY2, fontID, pointSize;
0108     bool bold, italic, underlined, monospace, serif, smallcaps;
0109     tesseract::Orientation orientation;
0110     tesseract::WritingDirection direction;
0111     tesseract::TextlineOrder order;
0112     float deskew_angle;
0113 
0114     QFont font;
0115     /* Currently, each word extracted from tesseract is printed
0116      * at it coordinates with transparent color.
0117      * We stretch each word to the bounding box since the actual used
0118      * fonts may be different. */
0119     const QTransform oldTransformation = painter.transform();
0120     if (it != nullptr) {
0121         do {
0122             if (it->Empty(level)) {
0123                 continue;
0124             }
0125             const char* word = it->GetUTF8Text(level);
0126             it->Baseline(level, &baseX1, &baseY1, &baseX2, &baseY2);
0127             it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
0128                                     &serif, &smallcaps, &pointSize, &fontID);
0129             it->Orientation(&orientation, &direction, &order, &deskew_angle);
0130             /* Font attributes other than pointSize do not work
0131              * https://github.com/tesseract-ocr/tesseract/issues/1074 */
0132             if (pointSize <= 0) {
0133                 pointSize = 8;
0134             }
0135             font.setStretch(100);
0136             font.setPointSize(pointSize);
0137             QFontMetrics metrics(font, &writer);
0138             const QString text = QString::fromUtf8(word);
0139             const QRect textBounds = metrics.boundingRect(text);
0140             int stretch = 100;
0141             QTransform transformation;
0142             transformation.translate(baseX1, baseY1);
0143             if (orientation == tesseract::ORIENTATION_PAGE_RIGHT) {
0144                 transformation.rotate(90);
0145                 stretch = static_cast<double>(baseY2 - baseY1) / textBounds.width() * 100;
0146             } else if (orientation == tesseract::ORIENTATION_PAGE_LEFT) {
0147                 transformation.rotate(270);
0148                 stretch = static_cast<double>(baseY1 - baseY2) / textBounds.width() * 100;
0149             } else if (orientation == tesseract::ORIENTATION_PAGE_DOWN) {
0150                 transformation.rotate(180);
0151                 stretch = static_cast<double>(baseX1 - baseX2) / textBounds.width() * 100;
0152             } else {
0153                 stretch = static_cast<double>(baseX2 - baseX1) / textBounds.width() * 100;
0154             }
0155             if (stretch <= 0) {
0156                 stretch = 100;
0157             }
0158             transformation.rotate(deskew_angle);
0159             font.setStretch(stretch);
0160             transformation.translate(-baseX1, -baseY1);
0161             painter.setFont(font);
0162             painter.setTransform(transformation, true);
0163             painter.drawText(baseX1, baseY1, text);
0164             delete[] word;
0165             painter.setTransform(oldTransformation);
0166         }
0167         while (it->Next(level));
0168     }
0169 #else
0170     Q_UNUSED(writer)
0171     Q_UNUSED(painter)
0172     Q_UNUSED(page)
0173 #endif
0174 }