File indexing completed on 2024-05-05 05:50:41
0001 /* 0002 SPDX-FileCopyrightText: 2016 Ragnar Thomsen <rthomsen6@gmail.com> 0003 0004 SPDX-License-Identifier: BSD-2-Clause 0005 */ 0006 0007 #include "mimetypes.h" 0008 #include "ark_debug.h" 0009 0010 #include <QFileInfo> 0011 #include <QMimeDatabase> 0012 #include <QRegularExpression> 0013 #include <QStandardPaths> 0014 0015 namespace Kerfuffle 0016 { 0017 QMimeType determineMimeType(const QString &filename, MimePreference mp) 0018 { 0019 QMimeDatabase db; 0020 0021 QFileInfo fileinfo(filename); 0022 QString inputFile = filename; 0023 0024 // #328815: since detection-by-content does not work for compressed tar archives (see below why) 0025 // we cannot rely on it when the archive extension is wrong; we need to validate by hand. 0026 if (fileinfo.completeSuffix().toLower().remove(QRegularExpression(QStringLiteral("[^a-z\\.]"))).contains(QLatin1String("tar."))) { 0027 inputFile.chop(fileinfo.completeSuffix().length()); 0028 QString cleanExtension(fileinfo.completeSuffix().toLower()); 0029 0030 // tar.bz2 and tar.lz4 need special treatment since they contain numbers. 0031 bool isBZ2 = false; 0032 bool isLZ4 = false; 0033 if (fileinfo.completeSuffix().contains(QLatin1String("bz2"), Qt::CaseInsensitive)) { 0034 cleanExtension.remove(QStringLiteral("bz2")); 0035 isBZ2 = true; 0036 } 0037 if (fileinfo.completeSuffix().contains(QLatin1String("lz4"), Qt::CaseInsensitive)) { 0038 cleanExtension.remove(QStringLiteral("lz4")); 0039 isLZ4 = true; 0040 } 0041 0042 // We remove non-alpha chars from the filename extension, but not periods. 0043 // If the filename is e.g. "foo.tar.gz.1", we get the "foo.tar.gz." string, 0044 // so we need to manually drop the last period character from it. 0045 cleanExtension.remove(QRegularExpression(QStringLiteral("[^a-z\\.]"))); 0046 if (cleanExtension.endsWith(QLatin1Char('.'))) { 0047 cleanExtension.chop(1); 0048 } 0049 0050 // Re-add extension for tar.bz2 and tar.lz4. 0051 if (isBZ2) { 0052 cleanExtension.append(QStringLiteral(".bz2")); 0053 } 0054 if (isLZ4) { 0055 cleanExtension.append(QStringLiteral(".lz4")); 0056 } 0057 0058 inputFile += cleanExtension; 0059 qCDebug(ARK) << "Validated filename of compressed tar" << filename << "into filename" << inputFile; 0060 } 0061 0062 QMimeType mimeFromExtension = db.mimeTypeForFile(inputFile, QMimeDatabase::MatchExtension); 0063 QMimeType mimeFromContent = db.mimeTypeForFile(filename, QMimeDatabase::MatchContent); 0064 0065 // mimeFromContent will be "application/octet-stream" when file is 0066 // unreadable, so use extension. 0067 if (!fileinfo.isReadable()) { 0068 return mimeFromExtension; 0069 } 0070 0071 // Compressed tar-archives are detected as single compressed files when 0072 // detecting by content. The following code fixes detection of tar.gz, tar.bz2, tar.xz, 0073 // tar.lzo, tar.lz, tar.lrz and tar.zst. 0074 if ((mimeFromExtension.inherits(QStringLiteral("application/x-compressed-tar")) 0075 && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/gzip"))) 0076 || (mimeFromExtension.inherits(QStringLiteral("application/x-bzip-compressed-tar")) 0077 && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-bzip"))) 0078 || (mimeFromExtension.inherits(QStringLiteral("application/x-bzip2-compressed-tar")) 0079 && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-bzip2"))) 0080 || (mimeFromExtension.inherits(QStringLiteral("application/x-xz-compressed-tar")) 0081 && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-xz"))) 0082 || (mimeFromExtension.inherits(QStringLiteral("application/x-tarz")) && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-compress"))) 0083 || (mimeFromExtension.inherits(QStringLiteral("application/x-tzo")) && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-lzop"))) 0084 || (mimeFromExtension.inherits(QStringLiteral("application/x-lzip-compressed-tar")) 0085 && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-lzip"))) 0086 || (mimeFromExtension.inherits(QStringLiteral("application/x-lrzip-compressed-tar")) 0087 && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-lrzip"))) 0088 || (mimeFromExtension.inherits(QStringLiteral("application/x-lz4-compressed-tar")) 0089 && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/x-lz4"))) 0090 || (mimeFromExtension.inherits(QStringLiteral("application/x-zstd-compressed-tar")) 0091 && mimeFromContent == db.mimeTypeForName(QStringLiteral("application/zstd")))) { 0092 return mimeFromExtension; 0093 } 0094 0095 if (mimeFromExtension != mimeFromContent) { 0096 if (mimeFromContent.isDefault()) { 0097 qCWarning(ARK) << "Could not detect mimetype from content." 0098 << "Using extension-based mimetype:" << mimeFromExtension.name(); 0099 return mimeFromExtension; 0100 } 0101 0102 qCDebug(ARK) << "Mimetype for filename extension (" << mimeFromExtension.name() << ") did not match mimetype for content (" << mimeFromContent.name() 0103 << "). Using content-based mimetype."; 0104 } 0105 0106 return mp == PreferExtensionMime ? mimeFromExtension : mimeFromContent; 0107 } 0108 0109 } // namespace Kerfuffle