File indexing completed on 2024-05-05 05:50:41

0001 /*
0002     SPDX-FileCopyrightText: 2016 Ragnar Thomsen <rthomsen6@gmail.com>
0003 
0004     SPDX-License-Identifier: BSD-2-Clause
0005 */
0006 
0007 #include "mimetypes.h"
0008 #include "ark_debug.h"
0009 
0010 #include <QFileInfo>
0011 #include <QMimeDatabase>
0012 #include <QRegularExpression>
0013 #include <QStandardPaths>
0014 
0015 namespace Kerfuffle
0016 {
0017 QMimeType determineMimeType(const QString &filename, MimePreference mp)
0018 {
0019     QMimeDatabase db;
0020 
0021     QFileInfo fileinfo(filename);
0022     QString inputFile = filename;
0023 
0024     // #328815: since detection-by-content does not work for compressed tar archives (see below why)
0025     // we cannot rely on it when the archive extension is wrong; we need to validate by hand.
0026     if (fileinfo.completeSuffix().toLower().remove(QRegularExpression(QStringLiteral("[^a-z\\.]"))).contains(QLatin1String("tar."))) {
0027         inputFile.chop(fileinfo.completeSuffix().length());
0028         QString cleanExtension(fileinfo.completeSuffix().toLower());
0029 
0030         // tar.bz2 and tar.lz4 need special treatment since they contain numbers.
0031         bool isBZ2 = false;
0032         bool isLZ4 = false;
0033         if (fileinfo.completeSuffix().contains(QLatin1String("bz2"), Qt::CaseInsensitive)) {
0034             cleanExtension.remove(QStringLiteral("bz2"));
0035             isBZ2 = true;
0036         }
0037         if (fileinfo.completeSuffix().contains(QLatin1String("lz4"), Qt::CaseInsensitive)) {
0038             cleanExtension.remove(QStringLiteral("lz4"));
0039             isLZ4 = true;
0040         }
0041 
0042         // We remove non-alpha chars from the filename extension, but not periods.
0043         // If the filename is e.g. "foo.tar.gz.1", we get the "foo.tar.gz." string,
0044         // so we need to manually drop the last period character from it.
0045         cleanExtension.remove(QRegularExpression(QStringLiteral("[^a-z\\.]")));
0046         if (cleanExtension.endsWith(QLatin1Char('.'))) {
0047             cleanExtension.chop(1);
0048         }
0049 
0050         // Re-add extension for tar.bz2 and tar.lz4.
0051         if (isBZ2) {
0052             cleanExtension.append(QStringLiteral(".bz2"));
0053         }
0054         if (isLZ4) {
0055             cleanExtension.append(QStringLiteral(".lz4"));
0056         }
0057 
0058         inputFile += cleanExtension;
0059         qCDebug(ARK) << "Validated filename of compressed tar" << filename << "into filename" << inputFile;
0060     }
0061 
0062     QMimeType mimeFromExtension = db.mimeTypeForFile(inputFile, QMimeDatabase::MatchExtension);
0063     QMimeType mimeFromContent = db.mimeTypeForFile(filename, QMimeDatabase::MatchContent);
0064 
0065     // mimeFromContent will be "application/octet-stream" when file is
0066     // unreadable, so use extension.
0067     if (!fileinfo.isReadable()) {
0068         return mimeFromExtension;
0069     }
0070 
0071     // Compressed tar-archives are detected as single compressed files when
0072     // detecting by content. The following code fixes detection of tar.gz, tar.bz2, tar.xz,
0073     // tar.lzo, tar.lz, tar.lrz and tar.zst.
0074     if ((mimeFromExtension.inherits(QStringLiteral("application/x-compressed-tar"))
0075          && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/gzip")))
0076         || (mimeFromExtension.inherits(QStringLiteral("application/x-bzip-compressed-tar"))
0077             && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-bzip")))
0078         || (mimeFromExtension.inherits(QStringLiteral("application/x-bzip2-compressed-tar"))
0079             && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-bzip2")))
0080         || (mimeFromExtension.inherits(QStringLiteral("application/x-xz-compressed-tar"))
0081             && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-xz")))
0082         || (mimeFromExtension.inherits(QStringLiteral("application/x-tarz")) && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-compress")))
0083         || (mimeFromExtension.inherits(QStringLiteral("application/x-tzo")) && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-lzop")))
0084         || (mimeFromExtension.inherits(QStringLiteral("application/x-lzip-compressed-tar"))
0085             && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-lzip")))
0086         || (mimeFromExtension.inherits(QStringLiteral("application/x-lrzip-compressed-tar"))
0087             && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-lrzip")))
0088         || (mimeFromExtension.inherits(QStringLiteral("application/x-lz4-compressed-tar"))
0089             && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-lz4")))
0090         || (mimeFromExtension.inherits(QStringLiteral("application/x-zstd-compressed-tar"))
0091             && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/zstd")))) {
0092         return mimeFromExtension;
0093     }
0094 
0095     if (mimeFromExtension != mimeFromContent) {
0096         if (mimeFromContent.isDefault()) {
0097             qCWarning(ARK) << "Could not detect mimetype from content."
0098                            << "Using extension-based mimetype:" << mimeFromExtension.name();
0099             return mimeFromExtension;
0100         }
0101 
0102         qCDebug(ARK) << "Mimetype for filename extension (" << mimeFromExtension.name() << ") did not match mimetype for content (" << mimeFromContent.name()
0103                      << "). Using content-based mimetype.";
0104     }
0105 
0106     return mp == PreferExtensionMime ? mimeFromExtension : mimeFromContent;
0107 }
0108 
0109 } // namespace Kerfuffle