File indexing completed on 2024-12-29 04:50:59
0001 /* 0002 SPDX-FileCopyrightText: 2018 Volker Krause <vkrause@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "config-kitinerary.h" 0008 #include "pdfdocument.h" 0009 #include "pdfdocument_p.h" 0010 #include "pdfextractoroutputdevice_p.h" 0011 #include "pdfimage_p.h" 0012 #include "popplerglobalparams_p.h" 0013 #include "popplerutils_p.h" 0014 #include "logging.h" 0015 0016 #include <QDebug> 0017 #include <QImage> 0018 #include <QScopedValueRollback> 0019 #include <QTimeZone> 0020 0021 #include <DateInfo.h> 0022 #include <PDFDoc.h> 0023 #include <PDFDocEncoding.h> 0024 #include <Stream.h> 0025 0026 #include <cmath> 0027 0028 using namespace KItinerary; 0029 0030 void PdfPagePrivate::load() 0031 { 0032 if (m_loaded) { 0033 return; 0034 } 0035 0036 PopplerGlobalParams gp; 0037 PdfExtractorOutputDevice device; 0038 m_doc->m_popplerDoc->displayPageSlice(&device, m_pageNum + 1, 72, 72, 0, false, true, false, -1, -1, -1, -1); 0039 m_doc->m_popplerDoc->processLinks(&device, m_pageNum + 1); 0040 device.finalize(); 0041 const auto pageRect = m_doc->m_popplerDoc->getPage(m_pageNum + 1)->getCropBox(); 0042 std::unique_ptr<GooString> s(device.getText(pageRect->x1, pageRect->y1, pageRect->x2, pageRect->y2)); 0043 0044 #if KPOPPLER_VERSION >= QT_VERSION_CHECK(0, 72, 0) 0045 m_text = QString::fromUtf8(s->c_str()); 0046 #else 0047 m_text = QString::fromUtf8(s->getCString()); 0048 #endif 0049 m_images = std::move(device.m_images); 0050 for (auto it = m_images.begin(); it != m_images.end(); ++it) { 0051 (*it).d->m_page = this; 0052 } 0053 0054 m_links = std::move(device.m_links); 0055 for (auto &link : m_links) { 0056 link.convertToPageRect(pageRect); 0057 } 0058 0059 m_loaded = true; 0060 } 0061 0062 PdfPage::PdfPage() 0063 : d(new PdfPagePrivate) 0064 { 0065 } 0066 0067 PdfPage::PdfPage(const PdfPage&) = default; 0068 PdfPage::~PdfPage() = default; 0069 PdfPage& PdfPage::operator=(const PdfPage&) = default; 0070 0071 QString PdfPage::text() const 0072 { 0073 d->load(); 0074 return d->m_text; 0075 } 0076 0077 static double ratio(double begin, double end, double ratio) 0078 { 0079 return begin + (end - begin) * ratio; 0080 } 0081 0082 QString PdfPage::textInRect(double left, double top, double right, double bottom) const 0083 { 0084 PopplerGlobalParams gp; 0085 0086 const auto page = d->m_doc->m_popplerDoc->getPage(d->m_pageNum + 1); 0087 const auto pageRect = page->getCropBox(); 0088 0089 double l; 0090 double t; 0091 double r; 0092 double b; 0093 switch (page->getRotate()) { 0094 case 0: 0095 l = ratio(pageRect->x1, pageRect->x2, left); 0096 t = ratio(pageRect->y1, pageRect->y2, top); 0097 r = ratio(pageRect->x1, pageRect->x2, right); 0098 b = ratio(pageRect->y1, pageRect->y2, bottom); 0099 break; 0100 case 90: 0101 l = ratio(pageRect->y1, pageRect->y2, left); 0102 t = ratio(pageRect->x1, pageRect->x2, top); 0103 r = ratio(pageRect->y1, pageRect->y2, right); 0104 b = ratio(pageRect->x1, pageRect->x2, bottom); 0105 break; 0106 default: 0107 qCWarning(Log) << "Unsupported page rotation!" << page->getRotate(); 0108 return {}; 0109 } 0110 0111 TextOutputDev device(nullptr, false, 0, false, false); 0112 d->m_doc->m_popplerDoc->displayPageSlice(&device, d->m_pageNum + 1, 72, 72, 0, false, true, false, -1, -1, -1, -1); 0113 std::unique_ptr<GooString> s(device.getText(l, t, r, b)); 0114 #if KPOPPLER_VERSION >= QT_VERSION_CHECK(0, 72, 0) 0115 return QString::fromUtf8(s->c_str()); 0116 #else 0117 return QString::fromUtf8(s->getCString()); 0118 #endif 0119 } 0120 0121 int PdfPage::imageCount() const 0122 { 0123 d->load(); 0124 return d->m_images.size(); 0125 } 0126 0127 PdfImage PdfPage::image(int index) const 0128 { 0129 d->load(); 0130 return d->m_images[index]; 0131 } 0132 0133 QVariantList PdfPage::imagesVariant() const 0134 { 0135 d->load(); 0136 QVariantList l; 0137 l.reserve(imageCount()); 0138 std::for_each(d->m_images.begin(), d->m_images.end(), [&l](const PdfImage& img) { l.push_back(QVariant::fromValue(img)); }); 0139 return l; 0140 } 0141 0142 QVariantList PdfPage::imagesInRect(double left, double top, double right, double bottom) const 0143 { 0144 d->load(); 0145 QVariantList l; 0146 PopplerGlobalParams gp; 0147 const auto pageRect = d->m_doc->m_popplerDoc->getPage(d->m_pageNum + 1)->getCropBox(); 0148 0149 for (const auto &img : d->m_images) { 0150 if ((img.d->m_transform.dx() >= ratio(pageRect->x1, pageRect->x2, left) && img.d->m_transform.dx() <= ratio(pageRect->x1, pageRect->x2, right)) && 0151 (img.d->m_transform.dy() >= ratio(pageRect->y1, pageRect->y2, top) && img.d->m_transform.dy() <= ratio(pageRect->y1, pageRect->y2, bottom))) 0152 { 0153 l.push_back(QVariant::fromValue(img)); 0154 } 0155 } 0156 return l; 0157 } 0158 0159 int PdfPage::linkCount() const 0160 { 0161 d->load(); 0162 return d->m_links.size(); 0163 } 0164 0165 PdfLink PdfPage::link(int index) const 0166 { 0167 d->load(); 0168 return d->m_links[index]; 0169 } 0170 0171 QVariantList PdfPage::linksVariant() const 0172 { 0173 d->load(); 0174 QVariantList l; 0175 l.reserve(d->m_links.size()); 0176 std::transform(d->m_links.begin(), d->m_links.end(), std::back_inserter(l), [](const PdfLink &link) { return QVariant::fromValue(link); }); 0177 return l; 0178 } 0179 0180 QVariantList PdfPage::linksInRect(double left, double top, double right, double bottom) const 0181 { 0182 QRectF bbox(QPointF(left, top), QPointF(right, bottom)); 0183 d->load(); 0184 0185 QVariantList l; 0186 for (const auto &link : d->m_links) { 0187 if (!link.area().intersects(bbox)) { 0188 continue; 0189 } 0190 l.push_back(QVariant::fromValue(link)); 0191 } 0192 0193 std::sort(l.begin(), l.end(), [](const auto &lhs, const auto &rhs) { 0194 const auto lhsLink = lhs.template value<PdfLink>(); 0195 const auto rhsLink = rhs.template value<PdfLink>(); 0196 if (lhsLink.area().top() == rhsLink.area().top()) { 0197 return lhsLink.area().left() < rhsLink.area().left(); 0198 } 0199 return lhsLink.area().top() < rhsLink.area().top(); 0200 }); 0201 0202 return l; 0203 } 0204 0205 static constexpr inline double pdfToMM(double points) 0206 { 0207 return points * 25.4 / 72.0; 0208 } 0209 0210 int PdfPage::width() const 0211 { 0212 const auto page = d->m_doc->m_popplerDoc->getPage(d->m_pageNum + 1); 0213 const auto rot = page->getRotate(); 0214 if (rot == 90 || rot == 270) { 0215 return pdfToMM(page->getCropHeight()); 0216 } 0217 return pdfToMM(page->getCropWidth()); 0218 } 0219 0220 int PdfPage::height() const 0221 { 0222 const auto page = d->m_doc->m_popplerDoc->getPage(d->m_pageNum + 1); 0223 const auto rot = page->getRotate(); 0224 if (rot == 90 || rot == 270) { 0225 return pdfToMM(page->getCropWidth()); 0226 } 0227 return pdfToMM(page->getCropHeight()); 0228 } 0229 0230 0231 PdfDocument::PdfDocument(QObject *parent) 0232 : QObject(parent) 0233 , d(new PdfDocumentPrivate) 0234 { 0235 } 0236 0237 PdfDocument::~PdfDocument() = default; 0238 0239 QString PdfDocument::text() const 0240 { 0241 QString text; 0242 std::for_each(d->m_pages.begin(), d->m_pages.end(), [&text](const PdfPage &p) { text += p.text(); }); 0243 return text; 0244 } 0245 0246 int PdfDocument::pageCount() const 0247 { 0248 return d->m_popplerDoc->getNumPages(); 0249 } 0250 0251 PdfPage PdfDocument::page(int index) const 0252 { 0253 return d->m_pages[index]; 0254 } 0255 0256 int PdfDocument::fileSize() const 0257 { 0258 return d->m_pdfData.size(); 0259 } 0260 0261 #if KPOPPLER_VERSION >= QT_VERSION_CHECK(21, 8, 0) 0262 static QDateTime parsePdfDateTime(const GooString *str) 0263 #else 0264 static QDateTime parsePdfDateTime(const char *str) 0265 #endif 0266 { 0267 int year; 0268 int month; 0269 int day; 0270 int hour; 0271 int min; 0272 int sec; 0273 int tzHours; 0274 int tzMins; 0275 char tz; 0276 0277 if (!parseDateString(str, &year, &month, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) { 0278 return {}; 0279 } 0280 0281 QDate date(year, month, day); 0282 QTime time(hour, min, sec); 0283 if (!date.isValid() || !time.isValid()) { 0284 return {}; 0285 } 0286 0287 int offset = tzHours * 3600 + tzMins * 60; 0288 if (tz == '+') { 0289 return QDateTime(date, time, QTimeZone::fromSecondsAheadOfUtc(offset)); 0290 } else if (tz == '-') { 0291 return QDateTime(date, time, QTimeZone::fromSecondsAheadOfUtc(-offset)); 0292 } 0293 return QDateTime(date, time, QTimeZone::UTC); 0294 } 0295 0296 QDateTime PdfDocument::creationTime() const 0297 { 0298 std::unique_ptr<GooString> dt(d->m_popplerDoc->getDocInfoCreatDate()); 0299 if (!dt) { 0300 return {}; 0301 } 0302 #if KPOPPLER_VERSION >= QT_VERSION_CHECK(21, 8, 0) 0303 return parsePdfDateTime(dt.get()); 0304 #elif KPOPPLER_VERSION >= QT_VERSION_CHECK(0, 72, 0) 0305 return parsePdfDateTime(dt->c_str()); 0306 #else 0307 return parsePdfDateTime(dt->getCString()); 0308 #endif 0309 } 0310 0311 QDateTime PdfDocument::modificationTime() const 0312 { 0313 std::unique_ptr<GooString> dt(d->m_popplerDoc->getDocInfoModDate()); 0314 if (!dt) { 0315 return {}; 0316 } 0317 #if KPOPPLER_VERSION >= QT_VERSION_CHECK(21, 8, 0) 0318 return parsePdfDateTime(dt.get()); 0319 #elif KPOPPLER_VERSION >= QT_VERSION_CHECK(0, 72, 0) 0320 return parsePdfDateTime(dt->c_str()); 0321 #else 0322 return parsePdfDateTime(dt->getCString()); 0323 #endif 0324 } 0325 0326 0327 QString gooStringToUnicode(const std::unique_ptr<GooString> &s) 0328 { 0329 if (!s) { 0330 return {}; 0331 } 0332 0333 if (s->hasUnicodeMarker() || s->hasUnicodeMarkerLE()) { 0334 return QString::fromUtf16(reinterpret_cast<const char16_t*>(s->toStr().c_str()), s->toStr().size() / 2); 0335 } else { 0336 int len = 0; 0337 std::unique_ptr<const char[]> utf16Data(pdfDocEncodingToUTF16(s->toStr(), &len)); 0338 return QString::fromUtf16(reinterpret_cast<const char16_t*>(utf16Data.get()), len / 2); 0339 } 0340 0341 return QString::fromUtf8(s->c_str()); 0342 } 0343 0344 QString PdfDocument::title() const 0345 { 0346 return gooStringToUnicode(d->m_popplerDoc->getDocInfoTitle()); 0347 } 0348 0349 QString PdfDocument::producer() const 0350 { 0351 return gooStringToUnicode(d->m_popplerDoc->getDocInfoProducer()); 0352 } 0353 0354 QString PdfDocument::creator() const 0355 { 0356 return gooStringToUnicode(d->m_popplerDoc->getDocInfoCreator()); 0357 } 0358 0359 QString PdfDocument::author() const 0360 { 0361 return gooStringToUnicode(d->m_popplerDoc->getDocInfoAuthor()); 0362 } 0363 0364 QVariantList PdfDocument::pagesVariant() const 0365 { 0366 QVariantList l; 0367 l.reserve(pageCount()); 0368 std::for_each(d->m_pages.begin(), d->m_pages.end(), [&l](const PdfPage& p) { l.push_back(QVariant::fromValue(p)); }); 0369 return l; 0370 } 0371 0372 PdfDocument* PdfDocument::fromData(const QByteArray &data, QObject *parent) 0373 { 0374 PopplerGlobalParams gp; 0375 0376 std::unique_ptr<PdfDocument> doc(new PdfDocument(parent)); 0377 doc->d->m_pdfData = data; 0378 // PDFDoc takes ownership of stream 0379 #if KPOPPLER_VERSION >= QT_VERSION_CHECK(0, 58, 0) 0380 auto stream = new MemStream(const_cast<char*>(doc->d->m_pdfData.constData()), 0, doc->d->m_pdfData.size(), Object()); 0381 #else 0382 Object obj; 0383 obj.initNull(); 0384 auto stream = new MemStream(const_cast<char*>(doc->d->m_pdfData.constData()), 0, doc->d->m_pdfData.size(), &obj); 0385 #endif 0386 std::unique_ptr<PDFDoc> popplerDoc(new PDFDoc(stream)); 0387 if (!popplerDoc->isOk()) { 0388 qCWarning(Log) << "Got invalid PDF document!" << popplerDoc->getErrorCode(); 0389 return nullptr; 0390 } 0391 0392 doc->d->m_pages.reserve(popplerDoc->getNumPages()); 0393 for (int i = 0; i < popplerDoc->getNumPages(); ++i) { 0394 PdfPage page; 0395 page.d->m_pageNum = i; 0396 page.d->m_doc = doc->d.get(); 0397 doc->d->m_pages.push_back(page); 0398 } 0399 0400 doc->d->m_popplerDoc = std::move(popplerDoc); 0401 return doc.release(); 0402 } 0403 0404 bool PdfDocument::maybePdf(const QByteArray &data) 0405 { 0406 return data.startsWith("%PDF"); 0407 } 0408 0409 #include "moc_pdfdocument.cpp"