File indexing completed on 2024-05-19 05:05:37

0001 /***************************************************************************
0002  *   SPDX-License-Identifier: GPL-2.0-or-later
0003  *                                                                         *
0004  *   SPDX-FileCopyrightText: 2004-2023 Thomas Fischer <fischer@unix-ag.uni-kl.de>
0005  *                                                                         *
0006  *   This program is free software; you can redistribute it and/or modify  *
0007  *   it under the terms of the GNU General Public License as published by  *
0008  *   the Free Software Foundation; either version 2 of the License, or     *
0009  *   (at your option) any later version.                                   *
0010  *                                                                         *
0011  *   This program is distributed in the hope that it will be useful,       *
0012  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0013  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0014  *   GNU General Public License for more details.                          *
0015  *                                                                         *
0016  *   You should have received a copy of the GNU General Public License     *
0017  *   along with this program; if not, see <https://www.gnu.org/licenses/>. *
0018  ***************************************************************************/
0019 
0020 #ifdef HAVE_POPPLERQT
0021 
0022 #include "fileimporterpdf.h"
0023 
0024 #include <QBuffer>
0025 #include <QFile>
0026 
0027 #ifdef HAVE_POPPLERQT5
0028 #include <poppler-qt5.h>
0029 #else // not HAVE_POPPLERQT5
0030 #ifdef HAVE_POPPLERQT6
0031 #include <poppler-qt6.h>
0032 #endif // HAVE_POPPLERQT6
0033 #endif // HAVE_POPPLERQT5
0034 
0035 #include <File>
0036 #include "fileimporterbibtex.h"
0037 #include "fileimporter_p.h"
0038 #include "logging_io.h"
0039 
0040 FileImporterPDF::FileImporterPDF(QObject *parent)
0041         : FileImporter(parent), m_cancelFlag(false)
0042 {
0043     m_bibTeXimporter = new FileImporterBibTeX(this);
0044     connect(m_bibTeXimporter, &FileImporterBibTeX::message, this, &FileImporterPDF::message);
0045 }
0046 
0047 FileImporterPDF::~FileImporterPDF()
0048 {
0049     delete m_bibTeXimporter;
0050 }
0051 
0052 File *FileImporterPDF::load(QIODevice *iodevice)
0053 {
0054     check_if_iodevice_invalid(iodevice);
0055 
0056     m_cancelFlag = false;
0057     File *result = nullptr;
0058     QByteArray buffer = iodevice->readAll();
0059 
0060 #ifdef HAVE_POPPLERQT5
0061     QScopedPointer<Poppler::Document> doc(Poppler::Document::loadFromData(buffer));
0062 #else // not HAVE_POPPLERQT5
0063 #ifdef HAVE_POPPLERQT6
0064     std::unique_ptr<Poppler::Document> doc = Poppler::Document::loadFromData(buffer);
0065 #endif // HAVE_POPPLERQT6
0066 #endif // HAVE_POPPLERQT5
0067     if (!doc) {
0068         qCWarning(LOG_KBIBTEX_IO) << "Could not load PDF document";
0069         iodevice->close();
0070         return nullptr;
0071     }
0072 
0073     /// Iterate through all files embedded in this PDF file (if any),
0074     /// check for file extension '.bib', and try to load bibliography
0075     /// data.
0076     if (doc->hasEmbeddedFiles()) {
0077         const QList<Poppler::EmbeddedFile *> embeddedFiles = doc->embeddedFiles();
0078         for (Poppler::EmbeddedFile *file : embeddedFiles) {
0079             if (file->name().endsWith(QStringLiteral(".bib"))) {
0080                 // TODO maybe request implementation of a constData() for
0081                 // Poppler::EmbeddedFile to operate on const objects?
0082                 QByteArray data(file->data());
0083                 QBuffer buffer(&data);
0084                 FileImporterBibTeX bibTeXimporter(this);
0085                 connect(&bibTeXimporter, &FileImporter::progress, this, &FileImporter::progress);
0086                 buffer.open(QIODevice::ReadOnly);
0087                 result = bibTeXimporter.load(&buffer);
0088                 buffer.close();
0089 
0090                 if (result) {
0091                     qCDebug(LOG_KBIBTEX_IO) << "Bibliography extracted from embedded file" << file->name() << "has" << result->count() << "entries";
0092                     if (result->count() > 0)
0093                         break; ///< stop processing after first valid, non-empty BibTeX file
0094                     else {
0095                         /// ... otherwise delete empty bibliography object
0096                         delete result;
0097                         result = nullptr;
0098                     }
0099                 } else
0100                     qCDebug(LOG_KBIBTEX_IO) << "Create bibliography file from embedded file" << file->name() << "failed";
0101             } else
0102                 qCDebug(LOG_KBIBTEX_IO) << "Embedded file" << file->name() << "doesn't have right extension ('.bib')";
0103         }
0104     } else
0105         qCDebug(LOG_KBIBTEX_IO) << "PDF document has no files embedded";
0106 
0107     iodevice->close();
0108     return result;
0109 }
0110 
0111 bool FileImporterPDF::guessCanDecode(const QString &)
0112 {
0113     return false;
0114 }
0115 
0116 void FileImporterPDF::cancel()
0117 {
0118     m_cancelFlag = true;
0119     m_bibTeXimporter->cancel();
0120 }
0121 
0122 #endif // HAVE_POPPLERQT