File indexing completed on 2025-03-16 12:49:36
0001 /* 0002 SPDX-FileCopyrightText: 2012 Vishesh Handa <me@vhanda.in> 0003 SPDX-FileCopyrightText: 2012 Jörg Ehrichs <joerg.ehrichs@gmx.de> 0004 0005 SPDX-License-Identifier: LGPL-2.1-or-later 0006 */ 0007 0008 0009 #include "popplerextractor.h" 0010 0011 #include <QScopedPointer> 0012 #include <QDebug> 0013 #include <QDateTime> 0014 0015 using namespace KFileMetaData; 0016 0017 PopplerExtractor::PopplerExtractor(QObject* parent) 0018 : ExtractorPlugin(parent) 0019 { 0020 0021 } 0022 0023 const QStringList supportedMimeTypes = { 0024 QStringLiteral("application/pdf"), 0025 }; 0026 0027 QStringList PopplerExtractor::mimetypes() const 0028 { 0029 return supportedMimeTypes; 0030 } 0031 0032 void PopplerExtractor::extract(ExtractionResult* result) 0033 { 0034 const QString fileUrl = result->inputUrl(); 0035 std::unique_ptr<Poppler::Document> pdfDoc(Poppler::Document::load(fileUrl, QByteArray(), QByteArray())); 0036 0037 if (!pdfDoc || pdfDoc->isLocked()) { 0038 return; 0039 } 0040 0041 result->addType(Type::Document); 0042 0043 if (result->inputFlags() & ExtractionResult::ExtractMetaData) { 0044 const QString title = pdfDoc->title(); 0045 if (!title.isEmpty()) { 0046 result->add(Property::Title, title); 0047 } 0048 0049 const QString subject = pdfDoc->subject(); 0050 if (!subject.isEmpty()) { 0051 result->add(Property::Subject, subject); 0052 } 0053 0054 const QString author = pdfDoc->author(); 0055 if (!author.isEmpty()) { 0056 result->add(Property::Author, author); 0057 } 0058 0059 const QString generator = pdfDoc->producer(); 0060 if (!generator.isEmpty()) { 0061 result->add(Property::Generator, generator); 0062 } 0063 0064 const QDateTime creationDate = pdfDoc->creationDate(); 0065 if (!creationDate.isNull()) { 0066 result->add(Property::CreationDate, creationDate); 0067 } 0068 0069 const int numPages = pdfDoc->numPages(); 0070 if (numPages > 0) { 0071 result->add(Property::PageCount, numPages); 0072 } 0073 } 0074 0075 if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) { 0076 return; 0077 } 0078 0079 for (int i = 0; i < pdfDoc->numPages(); i++) { 0080 std::unique_ptr<Poppler::Page> page(pdfDoc->page(i)); 0081 if (!page) { // broken pdf files do not return a valid page 0082 qWarning() << "Could not read page content from" << fileUrl; 0083 break; 0084 } 0085 result->append(page->text(QRectF())); 0086 } 0087 } 0088 0089 #include "moc_popplerextractor.cpp"