File indexing completed on 2024-05-19 05:05:37

0001 /***************************************************************************
0002  *   SPDX-License-Identifier: GPL-2.0-or-later
0003  *                                                                         *
0004  *   SPDX-FileCopyrightText: 2004-2022 Thomas Fischer <fischer@unix-ag.uni-kl.de>
0005  *                                                                         *
0006  *   This program is free software; you can redistribute it and/or modify  *
0007  *   it under the terms of the GNU General Public License as published by  *
0008  *   the Free Software Foundation; either version 2 of the License, or     *
0009  *   (at your option) any later version.                                   *
0010  *                                                                         *
0011  *   This program is distributed in the hope that it will be useful,       *
0012  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
0013  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
0014  *   GNU General Public License for more details.                          *
0015  *                                                                         *
0016  *   You should have received a copy of the GNU General Public License     *
0017  *   along with this program; if not, see <https://www.gnu.org/licenses/>. *
0018  ***************************************************************************/
0019 
0020 #ifndef KBIBTEX_IO_FILEINFO_H
0021 #define KBIBTEX_IO_FILEINFO_H
0022 
0023 #include <QSet>
0024 #include <QUrl>
0025 #include <QMimeType>
0026 #include <QSharedPointer>
0027 
0028 #ifdef HAVE_KF
0029 #include "kbibtexio_export.h"
0030 #endif // HAVE_KF
0031 
0032 class Entry;
0033 
0034 class KBIBTEXIO_EXPORT FileInfo
0035 {
0036 public:
0037     static const QString mimetypeOctetStream;
0038     static const QString mimetypeHTML;
0039     static const QString mimetypeBibTeX;
0040     static const QString mimetypeRIS;
0041     static const QString mimetypePDF;
0042 
0043     enum class TestExistence {
0044         Yes, ///< Test if file exists
0045         No ///< Skip test if file exists
0046     };
0047 
0048     /**
0049      * Finds a QMimeType with the given url.
0050      * Tries to guess a file's mime type by its extension first,
0051      * but falls back to QMimeType's mimeTypeForName if that does
0052      * not work. Background: If a HTTP or WebDAV server claims
0053      * that a .bib file is of mime type application/octet-stream,
0054      * QMimeType::mimeTypeForName will keep that assessment
0055      * instead of inspecting the file extension.
0056      *
0057      * @see QMimeType::mimeTypeForName
0058      * @param url Url to analyze
0059      * @return Guessed mime type
0060      */
0061     static QMimeType mimeTypeForUrl(const QUrl &url);
0062 
0063     /**
0064      * Find all file or URL references in the given text. Found filenames or
0065      * URLs are appended to the addTo list (duplicates are avoided).
0066      * Different test may get performed depending of the test for existence
0067      * of a potential file should be checked or not checked or if this matter
0068      * is undecided/irrelevant (recommended default case). For the test of
0069      * existence, baseDirectory is used to resolve relative paths.
0070      * @param text text to scan for filenames or URLs
0071      * @param testExistence shall be tested for file existence?
0072      * @param baseDirectory base directory for tests on relative path names
0073      * @param addTo add found URLs/filenames to this list
0074      */
0075     static void urlsInText(const QString &text, const TestExistence testExistence, const QString &baseDirectory, QSet<QUrl> &addTo);
0076 
0077     /**
0078      * Find all file or URL references in the given entry. Found filenames or
0079      * URLs are appended to the addTo list (duplicates are avoided).
0080      * Different test may get performed depending of the test for existence
0081      * of a potential file should be checked or not checked or if this matter
0082      * is undecided/irrelevant (recommended default case). For the test of
0083      * existence, bibTeXUrl is used to resolve relative paths.
0084      * @param entry entry to scan for filenames or URLs
0085      * @param bibTeXUrl base directory/URL for tests on relative path names
0086      * @param testExistence shall be tested for file existence?
0087      * @return list of found URLs/filenames (duplicates are avoided)
0088      */
0089     static QSet<QUrl> entryUrls(const QSharedPointer<const Entry> &entry, const QUrl &bibTeXUrl, TestExistence testExistence);
0090 
0091 #ifdef HAVE_POPPLERQT5
0092     /**
0093      * Load the given PDF file and return the contained plain text.
0094      * Makes use of Poppler to load and parse the file. All text
0095      * will be cached and loaded from cache if possible.
0096      * @param pdfFilename PDF file to load and extract text from
0097      * @return extracted plain text, either directly from PDF file or from cache OR QString() if there was an error
0098      */
0099     static QString pdfToText(const QString &pdfFilename);
0100 #endif // HAVE_POPPLERQT5
0101 
0102 protected:
0103     FileInfo();
0104 
0105 private:
0106 #ifdef HAVE_POPPLERQT5
0107     static void extractPDFTextToCache(const QString &pdfFilename, const QString &cacheFilename);
0108 #endif // HAVE_POPPLERQT5
0109 };
0110 
0111 #endif // KBIBTEX_IO_FILEINFO_H