File indexing completed on 2024-12-29 04:50:59
0001 /* 0002 SPDX-FileCopyrightText: 2019 Volker Krause <vkrause@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "pdfextractoroutputdevice_p.h" 0008 #include "pdfbarcodeutil_p.h" 0009 #include "pdfimage.h" 0010 #include "pdfimage_p.h" 0011 #include "popplerutils_p.h" 0012 0013 #include <Annot.h> 0014 #include <Link.h> 0015 #include <Page.h> 0016 0017 #include <QDebug> 0018 0019 using namespace KItinerary; 0020 0021 PdfExtractorOutputDevice::PdfExtractorOutputDevice() 0022 : TextOutputDev(nullptr, false, 0, false, false) 0023 { 0024 } 0025 0026 void PdfExtractorOutputDevice::addRasterImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, PdfImageType type) 0027 { 0028 if ((!colorMap && type == PdfImageType::Image) || (colorMap && !colorMap->isOk()) || (ref && !ref->isRef()) || (!ref && !str)) { 0029 return; 0030 } 0031 0032 QImage::Format format; 0033 if (!colorMap && type != PdfImageType::Image) { 0034 format = QImage::Format_Mono; 0035 } else if (colorMap->getColorSpace()->getMode() == csIndexed) { 0036 format = QImage::Format_RGB888; 0037 } else if (colorMap->getNumPixelComps() == 1 && (colorMap->getBits() >= 1 && colorMap->getBits() <= 8)) { 0038 format = QImage::Format_Grayscale8; 0039 } else if (colorMap->getNumPixelComps() == 3 && colorMap->getBits() == 8) { 0040 format = QImage::Format_RGB888; 0041 } else { 0042 return; 0043 } 0044 0045 PdfImage pdfImg; 0046 if (ref) { 0047 pdfImg.d->m_ref = PdfImageRef(ref->getRef().num, ref->getRef().gen, type); 0048 } 0049 0050 #if KPOPPLER_VERSION >= QT_VERSION_CHECK(0, 69, 0) 0051 if (colorMap) { 0052 pdfImg.d->m_colorMap.reset(colorMap->copy()); 0053 } 0054 #endif 0055 pdfImg.d->m_sourceHeight = height; 0056 pdfImg.d->m_sourceWidth = width; 0057 pdfImg.d->m_width = width; 0058 pdfImg.d->m_height = height; 0059 // deal with aspect-ratio changing scaling 0060 const auto sourceAspectRatio = (double)width / (double)height; 0061 const auto targetAspectRatio = std::abs(state->getCTM()[0] / -state->getCTM()[3]); 0062 if (!qFuzzyCompare(sourceAspectRatio, targetAspectRatio) && qFuzzyIsNull(state->getCTM()[1]) && qFuzzyIsNull(state->getCTM()[2])) { 0063 if (targetAspectRatio > sourceAspectRatio) { 0064 pdfImg.d->m_width = width * targetAspectRatio / sourceAspectRatio; 0065 } else { 0066 pdfImg.d->m_height = height * sourceAspectRatio / targetAspectRatio; 0067 } 0068 } 0069 pdfImg.d->m_transform = PopplerUtils::currentTransform(state); 0070 pdfImg.d->m_format = format; 0071 0072 if (!ref) { 0073 pdfImg.d->load(str, colorMap); 0074 } 0075 0076 m_images.push_back(pdfImg); 0077 } 0078 0079 void PdfExtractorOutputDevice::drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, bool invert, bool interpolate, bool inlineImg) 0080 { 0081 Q_UNUSED(invert); 0082 Q_UNUSED(interpolate); 0083 0084 if (!str && !inlineImg) { 0085 return; 0086 } 0087 addRasterImage(state, ref, str, width, height, nullptr, PdfImageType::Mask); 0088 } 0089 0090 void PdfExtractorOutputDevice::drawImage(GfxState* state, Object* ref, Stream* str, int width, int height, GfxImageColorMap* colorMap, bool interpolate, PopplerMaskColors* maskColors, bool inlineImg) 0091 { 0092 Q_UNUSED(interpolate) 0093 Q_UNUSED(maskColors) 0094 0095 if (!str && !inlineImg) { 0096 return; 0097 } 0098 addRasterImage(state, ref, str, width, height, colorMap, PdfImageType::Image); 0099 } 0100 0101 void PdfExtractorOutputDevice::drawMaskedImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool interpolate, Stream *maskStr, int maskWidth, int maskHeight, bool maskInvert, bool maskInterpolate) 0102 { 0103 Q_UNUSED(interpolate) 0104 Q_UNUSED(maskInvert) 0105 Q_UNUSED(maskInterpolate) 0106 0107 addRasterImage(state, ref, str, width, height, colorMap, PdfImageType::Image); 0108 0109 if (ref) { 0110 const auto dict = str->getDict(); 0111 const auto maskObj = dict->lookup("Mask"); 0112 if (maskObj.isStream()) { 0113 addRasterImage(state, ref, maskStr, maskWidth, maskHeight, nullptr, PdfImageType::Mask); 0114 } 0115 } 0116 } 0117 0118 void PdfExtractorOutputDevice::saveState(GfxState *state) 0119 { 0120 Q_UNUSED(state) 0121 m_vectorOps.push_back(VectorOp{VectorOp::PushState, {}, {}}); 0122 } 0123 0124 void PdfExtractorOutputDevice::restoreState(GfxState *state) 0125 { 0126 Q_UNUSED(state) 0127 if (m_vectorOps.empty()) { 0128 return; 0129 } 0130 const auto &lastOp = *(m_vectorOps.end() -1); 0131 if (lastOp.type == VectorOp::PushState) { 0132 m_vectorOps.resize(m_vectorOps.size() - 1); 0133 } else { 0134 m_vectorOps.push_back(VectorOp{VectorOp::PopState, {}, {}}); 0135 } 0136 } 0137 0138 static bool isRelevantStroke(const QPen &pen) 0139 { 0140 return !qFuzzyCompare(pen.widthF(), 0.0) && pen.color() == Qt::black; 0141 } 0142 0143 static bool isRectangularPath(const QPainterPath &path) 0144 { 0145 qreal x = 0.0, y = 0.0; 0146 for (int i = 0; i < path.elementCount(); ++i) { 0147 const auto elem = path.elementAt(i); 0148 switch (elem.type) { 0149 case QPainterPath::MoveToElement: 0150 x = elem.x; 0151 y = elem.y; 0152 break; 0153 case QPainterPath::LineToElement: 0154 if (x != elem.x && y != elem.y) { 0155 qDebug() << "path contains diagonal line, discarding"; 0156 return false; 0157 } 0158 x = elem.x; 0159 y = elem.y; 0160 break; 0161 case QPainterPath::CurveToElement: 0162 case QPainterPath::CurveToDataElement: 0163 qDebug() << "path contains a curve, discarding"; 0164 return false; 0165 } 0166 } 0167 0168 return true; 0169 } 0170 0171 void PdfExtractorOutputDevice::stroke(GfxState *state) 0172 { 0173 const auto pen = PopplerUtils::currentPen(state); 0174 if (!isRelevantStroke(pen)) { 0175 return; 0176 } 0177 0178 const auto path = PopplerUtils::convertPath(state->getPath(), Qt::WindingFill); 0179 if (!isRectangularPath(path)) { 0180 return; 0181 } 0182 const auto t = PopplerUtils::currentTransform(state); 0183 m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, pen, QBrush()}}); 0184 } 0185 0186 static bool isRelevantFill(const QBrush &brush) 0187 { 0188 return brush.color() == Qt::black; 0189 } 0190 0191 void PdfExtractorOutputDevice::fill(GfxState *state) 0192 { 0193 const auto brush = PopplerUtils::currentBrush(state); 0194 if (!isRelevantFill(brush)) { 0195 return; 0196 } 0197 0198 const auto path = PopplerUtils::convertPath(state->getPath(), Qt::WindingFill); 0199 const auto b = path.boundingRect(); 0200 if (b.width() == 0 || b.height() == 0) { 0201 return; 0202 } 0203 0204 const auto t = PopplerUtils::currentTransform(state); 0205 m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, QPen(), brush}}); 0206 } 0207 0208 void PdfExtractorOutputDevice::eoFill(GfxState *state) 0209 { 0210 const auto brush = PopplerUtils::currentBrush(state); 0211 if (!isRelevantFill(brush)) { 0212 return; 0213 } 0214 0215 const auto path = PopplerUtils::convertPath(state->getPath(), Qt::OddEvenFill); 0216 const auto b = path.boundingRect(); 0217 if (b.width() == 0 || b.height() == 0) { 0218 return; 0219 } 0220 0221 const auto t = PopplerUtils::currentTransform(state); 0222 m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, QPen(), brush}}); 0223 } 0224 0225 void PdfExtractorOutputDevice::finalize() 0226 { 0227 // remove single state groups, then try to merge adjacents paths 0228 std::vector<VectorOp> mergedOps; 0229 mergedOps.reserve(m_vectorOps.size()); 0230 for (auto it = m_vectorOps.begin(); it != m_vectorOps.end(); ++it) { 0231 if ((*it).type == VectorOp::PushState && std::distance(it, m_vectorOps.end()) >= 2 && (*(it + 1)).type == VectorOp::Path && (*(it + 2)).type == VectorOp::PopState) { 0232 ++it; 0233 mergedOps.push_back(*it); 0234 ++it; 0235 } else { 0236 mergedOps.push_back(*it); 0237 } 0238 } 0239 //qDebug() << m_vectorOps.size() << mergedOps.size(); 0240 0241 std::vector<PdfVectorPicture::PathStroke> strokes; 0242 QTransform t; 0243 for (const auto &op : mergedOps) { 0244 if (op.type == VectorOp::Path) { 0245 if (t.isIdentity()) { 0246 t = op.transform; 0247 } 0248 if (t != op.transform) { 0249 //qDebug() << "diffent transforms for strokes, not supported yet"; 0250 continue; 0251 } 0252 strokes.push_back(op.stroke); 0253 } else if (!strokes.empty()) { 0254 PdfVectorPicture pic; 0255 pic.setStrokes(std::move(strokes)); 0256 pic.setTransform(t); 0257 addVectorImage(pic); 0258 t = QTransform(); 0259 } 0260 } 0261 if (!strokes.empty()) { 0262 PdfVectorPicture pic; 0263 pic.setStrokes(std::move(strokes)); 0264 pic.setTransform(t); 0265 addVectorImage(pic); 0266 } 0267 } 0268 0269 void PdfExtractorOutputDevice::addVectorImage(const PdfVectorPicture &pic) 0270 { 0271 if (PdfBarcodeUtil::isPlausiblePath(pic.pathElementsCount(), BarcodeDecoder::Any) == BarcodeDecoder::None) { 0272 return; 0273 } 0274 0275 PdfImage img; 0276 img.d->m_height = pic.height(); 0277 img.d->m_width = pic.width(); 0278 img.d->m_sourceHeight = pic.sourceHeight(); 0279 img.d->m_sourceWidth = pic.sourceWidth(); 0280 img.d->m_transform = pic.transform(); 0281 img.d->m_vectorPicture = pic; 0282 m_images.push_back(img); 0283 } 0284 0285 void PdfExtractorOutputDevice::processLink(AnnotLink *link) 0286 { 0287 TextOutputDev::processLink(link); 0288 if (!link->isOk() || !link->getAction() || link->getAction()->getKind() != actionURI) { 0289 return; 0290 } 0291 0292 const auto uriLink = static_cast<LinkURI*>(link->getAction()); 0293 double xd1, yd1, xd2, yd2; 0294 link->getRect(&xd1, &yd1, &xd2, &yd2); 0295 0296 double xu1, yu1, xu2, yu2; 0297 cvtDevToUser(xd1, yd1, &xu1, &yu1); 0298 cvtDevToUser(xd2, yd2, &xu2, &yu2); 0299 PdfLink l(QString::fromStdString(uriLink->getURI()), QRectF(QPointF(std::min(xu1, xu2), std::min(yu1, yu2)), QPointF(std::max(xu1, xu2), std::max(yu1, yu2)))); 0300 m_links.push_back(std::move(l)); 0301 }