File indexing completed on 2024-12-29 04:50:59

0001 /*
0002     SPDX-FileCopyrightText: 2019 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "pdfextractoroutputdevice_p.h"
0008 #include "pdfbarcodeutil_p.h"
0009 #include "pdfimage.h"
0010 #include "pdfimage_p.h"
0011 #include "popplerutils_p.h"
0012 
0013 #include <Annot.h>
0014 #include <Link.h>
0015 #include <Page.h>
0016 
0017 #include <QDebug>
0018 
0019 using namespace KItinerary;
0020 
0021 PdfExtractorOutputDevice::PdfExtractorOutputDevice()
0022     : TextOutputDev(nullptr, false, 0, false, false)
0023 {
0024 }
0025 
0026 void PdfExtractorOutputDevice::addRasterImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, PdfImageType type)
0027 {
0028     if ((!colorMap && type == PdfImageType::Image) || (colorMap && !colorMap->isOk()) || (ref && !ref->isRef()) || (!ref && !str)) {
0029         return;
0030     }
0031 
0032     QImage::Format format;
0033     if (!colorMap && type != PdfImageType::Image) {
0034         format = QImage::Format_Mono;
0035     } else if (colorMap->getColorSpace()->getMode() == csIndexed) {
0036         format = QImage::Format_RGB888;
0037     } else if (colorMap->getNumPixelComps() == 1 && (colorMap->getBits() >= 1 && colorMap->getBits() <= 8)) {
0038         format = QImage::Format_Grayscale8;
0039     } else if (colorMap->getNumPixelComps() == 3 && colorMap->getBits() == 8) {
0040         format = QImage::Format_RGB888;
0041     } else {
0042         return;
0043     }
0044 
0045     PdfImage pdfImg;
0046     if (ref) {
0047         pdfImg.d->m_ref = PdfImageRef(ref->getRef().num, ref->getRef().gen, type);
0048     }
0049 
0050 #if KPOPPLER_VERSION >= QT_VERSION_CHECK(0, 69, 0)
0051     if (colorMap) {
0052         pdfImg.d->m_colorMap.reset(colorMap->copy());
0053     }
0054 #endif
0055     pdfImg.d->m_sourceHeight = height;
0056     pdfImg.d->m_sourceWidth = width;
0057     pdfImg.d->m_width = width;
0058     pdfImg.d->m_height = height;
0059     // deal with aspect-ratio changing scaling
0060     const auto sourceAspectRatio = (double)width / (double)height;
0061     const auto targetAspectRatio = std::abs(state->getCTM()[0] / -state->getCTM()[3]);
0062     if (!qFuzzyCompare(sourceAspectRatio, targetAspectRatio) && qFuzzyIsNull(state->getCTM()[1]) && qFuzzyIsNull(state->getCTM()[2])) {
0063         if (targetAspectRatio > sourceAspectRatio) {
0064             pdfImg.d->m_width = width * targetAspectRatio / sourceAspectRatio;
0065         } else {
0066             pdfImg.d->m_height = height * sourceAspectRatio / targetAspectRatio;
0067         }
0068     }
0069     pdfImg.d->m_transform = PopplerUtils::currentTransform(state);
0070     pdfImg.d->m_format = format;
0071 
0072     if (!ref) {
0073         pdfImg.d->load(str, colorMap);
0074     }
0075 
0076     m_images.push_back(pdfImg);
0077 }
0078 
0079 void PdfExtractorOutputDevice::drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, bool invert, bool interpolate, bool inlineImg)
0080 {
0081     Q_UNUSED(invert);
0082     Q_UNUSED(interpolate);
0083 
0084     if (!str && !inlineImg) {
0085         return;
0086     }
0087     addRasterImage(state, ref, str, width, height, nullptr, PdfImageType::Mask);
0088 }
0089 
0090 void PdfExtractorOutputDevice::drawImage(GfxState* state, Object* ref, Stream* str, int width, int height, GfxImageColorMap* colorMap, bool interpolate, PopplerMaskColors* maskColors, bool inlineImg)
0091 {
0092     Q_UNUSED(interpolate)
0093     Q_UNUSED(maskColors)
0094 
0095     if (!str && !inlineImg) {
0096         return;
0097     }
0098     addRasterImage(state, ref, str, width, height, colorMap, PdfImageType::Image);
0099 }
0100 
0101 void PdfExtractorOutputDevice::drawMaskedImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool interpolate, Stream *maskStr, int maskWidth, int maskHeight, bool maskInvert, bool maskInterpolate)
0102 {
0103     Q_UNUSED(interpolate)
0104     Q_UNUSED(maskInvert)
0105     Q_UNUSED(maskInterpolate)
0106 
0107     addRasterImage(state, ref, str, width, height, colorMap, PdfImageType::Image);
0108 
0109     if (ref) {
0110         const auto dict = str->getDict();
0111         const auto maskObj = dict->lookup("Mask");
0112         if (maskObj.isStream()) {
0113             addRasterImage(state, ref, maskStr, maskWidth, maskHeight, nullptr, PdfImageType::Mask);
0114         }
0115     }
0116 }
0117 
0118 void PdfExtractorOutputDevice::saveState(GfxState *state)
0119 {
0120     Q_UNUSED(state)
0121     m_vectorOps.push_back(VectorOp{VectorOp::PushState, {}, {}});
0122 }
0123 
0124 void PdfExtractorOutputDevice::restoreState(GfxState *state)
0125 {
0126     Q_UNUSED(state)
0127     if (m_vectorOps.empty()) {
0128         return;
0129     }
0130     const auto &lastOp = *(m_vectorOps.end() -1);
0131     if (lastOp.type == VectorOp::PushState) {
0132         m_vectorOps.resize(m_vectorOps.size() - 1);
0133     } else {
0134         m_vectorOps.push_back(VectorOp{VectorOp::PopState, {}, {}});
0135     }
0136 }
0137 
0138 static bool isRelevantStroke(const QPen &pen)
0139 {
0140     return !qFuzzyCompare(pen.widthF(), 0.0) && pen.color() == Qt::black;
0141 }
0142 
0143 static bool isRectangularPath(const QPainterPath &path)
0144 {
0145     qreal x = 0.0, y = 0.0;
0146     for (int i = 0; i < path.elementCount(); ++i) {
0147         const auto elem = path.elementAt(i);
0148         switch (elem.type) {
0149             case QPainterPath::MoveToElement:
0150                 x = elem.x;
0151                 y = elem.y;
0152                 break;
0153             case QPainterPath::LineToElement:
0154                 if (x != elem.x && y != elem.y) {
0155                     qDebug() << "path contains diagonal line, discarding";
0156                     return false;
0157                 }
0158                 x = elem.x;
0159                 y = elem.y;
0160                 break;
0161             case QPainterPath::CurveToElement:
0162             case QPainterPath::CurveToDataElement:
0163                 qDebug() << "path contains a curve, discarding";
0164                 return false;
0165         }
0166     }
0167 
0168     return true;
0169 }
0170 
0171 void PdfExtractorOutputDevice::stroke(GfxState *state)
0172 {
0173     const auto pen = PopplerUtils::currentPen(state);
0174     if (!isRelevantStroke(pen)) {
0175         return;
0176     }
0177 
0178     const auto path = PopplerUtils::convertPath(state->getPath(), Qt::WindingFill);
0179     if (!isRectangularPath(path)) {
0180         return;
0181     }
0182     const auto t = PopplerUtils::currentTransform(state);
0183     m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, pen, QBrush()}});
0184 }
0185 
0186 static bool isRelevantFill(const QBrush &brush)
0187 {
0188     return brush.color() == Qt::black;
0189 }
0190 
0191 void PdfExtractorOutputDevice::fill(GfxState *state)
0192 {
0193     const auto brush = PopplerUtils::currentBrush(state);
0194     if (!isRelevantFill(brush)) {
0195         return;
0196     }
0197 
0198     const auto path = PopplerUtils::convertPath(state->getPath(), Qt::WindingFill);
0199     const auto b = path.boundingRect();
0200     if (b.width() == 0  || b.height() == 0) {
0201         return;
0202     }
0203 
0204     const auto t = PopplerUtils::currentTransform(state);
0205     m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, QPen(), brush}});
0206 }
0207 
0208 void PdfExtractorOutputDevice::eoFill(GfxState *state)
0209 {
0210     const auto brush = PopplerUtils::currentBrush(state);
0211     if (!isRelevantFill(brush)) {
0212         return;
0213     }
0214 
0215     const auto path = PopplerUtils::convertPath(state->getPath(), Qt::OddEvenFill);
0216     const auto b = path.boundingRect();
0217     if (b.width() == 0  || b.height() == 0) {
0218         return;
0219     }
0220 
0221     const auto t = PopplerUtils::currentTransform(state);
0222     m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, QPen(), brush}});
0223 }
0224 
0225 void PdfExtractorOutputDevice::finalize()
0226 {
0227     // remove single state groups, then try to merge adjacents paths
0228     std::vector<VectorOp> mergedOps;
0229     mergedOps.reserve(m_vectorOps.size());
0230     for (auto it = m_vectorOps.begin(); it != m_vectorOps.end(); ++it) {
0231         if ((*it).type == VectorOp::PushState && std::distance(it, m_vectorOps.end()) >= 2 && (*(it + 1)).type == VectorOp::Path && (*(it + 2)).type == VectorOp::PopState) {
0232             ++it;
0233             mergedOps.push_back(*it);
0234             ++it;
0235         } else {
0236             mergedOps.push_back(*it);
0237         }
0238     }
0239     //qDebug() << m_vectorOps.size() << mergedOps.size();
0240 
0241     std::vector<PdfVectorPicture::PathStroke> strokes;
0242     QTransform t;
0243     for (const auto &op : mergedOps) {
0244         if (op.type == VectorOp::Path) {
0245             if (t.isIdentity()) {
0246                 t = op.transform;
0247             }
0248             if (t != op.transform) {
0249                 //qDebug() << "diffent transforms for strokes, not supported yet";
0250                 continue;
0251             }
0252             strokes.push_back(op.stroke);
0253         } else if (!strokes.empty()) {
0254             PdfVectorPicture pic;
0255             pic.setStrokes(std::move(strokes));
0256             pic.setTransform(t);
0257             addVectorImage(pic);
0258             t = QTransform();
0259         }
0260     }
0261     if (!strokes.empty()) {
0262         PdfVectorPicture pic;
0263         pic.setStrokes(std::move(strokes));
0264         pic.setTransform(t);
0265         addVectorImage(pic);
0266     }
0267 }
0268 
0269 void PdfExtractorOutputDevice::addVectorImage(const PdfVectorPicture &pic)
0270 {
0271     if (PdfBarcodeUtil::isPlausiblePath(pic.pathElementsCount(), BarcodeDecoder::Any) == BarcodeDecoder::None) {
0272         return;
0273     }
0274 
0275     PdfImage img;
0276     img.d->m_height = pic.height();
0277     img.d->m_width = pic.width();
0278     img.d->m_sourceHeight = pic.sourceHeight();
0279     img.d->m_sourceWidth = pic.sourceWidth();
0280     img.d->m_transform = pic.transform();
0281     img.d->m_vectorPicture = pic;
0282     m_images.push_back(img);
0283 }
0284 
0285 void PdfExtractorOutputDevice::processLink(AnnotLink *link)
0286 {
0287     TextOutputDev::processLink(link);
0288     if (!link->isOk() || !link->getAction() || link->getAction()->getKind() != actionURI) {
0289         return;
0290     }
0291 
0292     const auto uriLink = static_cast<LinkURI*>(link->getAction());
0293     double xd1, yd1, xd2, yd2;
0294     link->getRect(&xd1, &yd1, &xd2, &yd2);
0295 
0296     double xu1, yu1, xu2, yu2;
0297     cvtDevToUser(xd1, yd1, &xu1, &yu1);
0298     cvtDevToUser(xd2, yd2, &xu2, &yu2);
0299     PdfLink l(QString::fromStdString(uriLink->getURI()), QRectF(QPointF(std::min(xu1, xu2), std::min(yu1, yu2)), QPointF(std::max(xu1, xu2), std::max(yu1, yu2))));
0300     m_links.push_back(std::move(l));
0301 }