File indexing completed on 2024-04-28 15:39:55
0001 /* SPDX-FileCopyrightText: 2010-2018 Jesper Pedersen <blackie@blackie.dk> and 0002 Robert Krawitz <rlk@alum.mit.edu> 0003 0004 SPDX-License-Identifier: GPL-2.0-or-later 0005 */ 0006 0007 #include "FastDir.h" 0008 0009 #include <kpabase/Logging.h> 0010 0011 #include <QFile> 0012 #include <QLoggingCategory> 0013 #include <QMap> 0014 0015 extern "C" { 0016 #include <dirent.h> 0017 #include <stdio.h> 0018 #include <sys/types.h> 0019 0020 /* 0021 * Ideally the order of entries returned by readdir() should be close 0022 * to optimal; intuitively, it should reflect the order in which inodes 0023 * are returned from getdents() or equivalent, which should be the order 0024 * in which they're stored on the disk. Experimentally, that isn't always 0025 * true. One test, involving 10839 files totaling 90 GB resulted in 0026 * readdir() returning files in random order, where "find" returned them 0027 * sorted (and the same version of find does *not* in general return files 0028 * in alphabetical order). 0029 * 0030 * By repeated measurement, loading the files in the order returned by 0031 * readdir took about 16:30, where loading them in alphanumeric sorted order 0032 * took about 15:00. Running a similar test outside of kpa (using the order 0033 * returned by readdir() vs. sorted to cat the files through dd and measuring 0034 * the time) yielded if anything an even greater discrepancy (17:35 vs. 14:10). 0035 * 0036 * This issue is filesystem dependent, but is known to affect the extN 0037 * filesystems commonly used on Linux that use a hashed tree structure to 0038 * store directories. See e. g. 0039 * http://home.ifi.uio.no/paalh/publications/files/ipccc09.pdf and its 0040 * accompanying presentation 0041 * http://www.linux-kongress.org/2009/slides/linux_disk_io_performance_havard_espeland.pdf 0042 * 0043 * We could do even better by sorting by block position, but that would 0044 * greatly increase complexity. 0045 */ 0046 #ifdef __linux__ 0047 #include <linux/magic.h> 0048 #include <sys/vfs.h> 0049 #define HAVE_STATFS 0050 #define STATFS_FSTYPE_EXT2 EXT2_SUPER_MAGIC // Includes EXT3_SUPER_MAGIC, EXT4_SUPER_MAGIC 0051 #else 0052 #ifdef __FreeBSD__ 0053 #include <sys/disklabel.h> 0054 #include <sys/mount.h> 0055 #include <sys/param.h> 0056 #define HAVE_STATFS 0057 #define STATFS_FSTYPE_EXT2 FS_EXT2FS 0058 #endif 0059 // other platforms fall back to known-safe (but slower) implementation 0060 #endif // __linux__ 0061 } 0062 0063 typedef QMap<ino_t, QString> InodeMap; 0064 0065 typedef QSet<QString> StringSet; 0066 0067 DB::FastDir::FastDir(const QString &path) 0068 : m_path(path) 0069 { 0070 InodeMap tmpAnswer; 0071 DIR *dir; 0072 dirent *file; 0073 QByteArray bPath(QFile::encodeName(path)); 0074 dir = opendir(bPath.constData()); 0075 if (!dir) 0076 return; 0077 const bool doSortByInode = sortByInode(bPath); 0078 const bool doSortByName = sortByName(bPath); 0079 0080 #if defined(QT_THREAD_SUPPORT) && defined(_POSIX_THREAD_SAFE_FUNCTIONS) && !defined(Q_OS_CYGWIN) 0081 // ZaJ (2016-03-23): while porting to Qt5/KF5, this code-path is disabled on my system 0082 // I don't want to touch this right now since I can't verify correctness in any way. 0083 // rlk 2018-05-20: readdir_r is deprecated as of glibc 2.24; see 0084 // http://man7.org/linux/man-pages/man3/readdir_r.3.html. 0085 // There are problems with MAXNAMLEN/NAME_MAX and friends, that 0086 // can differ from filesystem to filesystem. It's also expected 0087 // that POSIX will (if it hasn't already) deprecate readdir_r 0088 // and require readdir to be thread safe. 0089 union dirent_buf { 0090 struct KDE_struct_dirent mt_file; 0091 char b[sizeof(struct dirent) + MAXNAMLEN + 1]; 0092 } *u = new union dirent_buf; 0093 while (readdir_r(dir, &(u->mt_file), &file) == 0 && file) 0094 #else 0095 // FIXME: use 64bit versions of readdir and dirent? 0096 while ((file = readdir(dir))) 0097 #endif // QT_THREAD_SUPPORT && _POSIX_THREAD_SAFE_FUNCTIONS 0098 { 0099 if (doSortByInode) 0100 tmpAnswer.insert(file->d_ino, QFile::decodeName(file->d_name)); 0101 else 0102 m_sortedList.append(QFile::decodeName(file->d_name)); 0103 } 0104 #if defined(QT_THREAD_SUPPORT) && defined(_POSIX_THREAD_SAFE_FUNCTIONS) && !defined(Q_OS_CYGWIN) 0105 delete u; 0106 #endif 0107 (void)closedir(dir); 0108 0109 if (doSortByInode) { 0110 for (InodeMap::iterator it = tmpAnswer.begin(); it != tmpAnswer.end(); ++it) { 0111 m_sortedList << it.value(); 0112 } 0113 } else if (doSortByName) { 0114 m_sortedList.sort(); 0115 } 0116 } 0117 0118 // No currently known filesystems where sort by name is optimal 0119 constexpr bool DB::sortByName(const QByteArray &) 0120 { 0121 return false; 0122 } 0123 0124 bool DB::sortByInode(const QByteArray &path) 0125 { 0126 #ifdef HAVE_STATFS 0127 struct statfs buf; 0128 if (statfs(path.constData(), &buf) == -1) 0129 return -1; 0130 // Add other filesystems as appropriate 0131 switch (buf.f_type) { 0132 case STATFS_FSTYPE_EXT2: 0133 return true; 0134 default: 0135 return false; 0136 } 0137 #else // HAVE_STATFS 0138 Q_UNUSED(path); 0139 return false; 0140 #endif // HAVE_STATFS 0141 } 0142 0143 const QStringList DB::FastDir::entryList() const 0144 { 0145 return m_sortedList; 0146 } 0147 0148 QStringList DB::FastDir::sortFileList(const StringSet &files) const 0149 { 0150 QStringList answer; 0151 StringSet tmp(files); 0152 for (const QString &fileName : m_sortedList) { 0153 if (tmp.contains(fileName)) { 0154 answer << fileName; 0155 tmp.remove(fileName); 0156 } else if (tmp.contains(m_path + fileName)) { 0157 answer << m_path + fileName; 0158 tmp.remove(m_path + fileName); 0159 } 0160 } 0161 if (tmp.count() > 0) { 0162 qCDebug(FastDirLog) << "Files left over after sorting on " << m_path; 0163 for (const QString &fileName : tmp) { 0164 qCDebug(FastDirLog) << fileName; 0165 answer << fileName; 0166 } 0167 } 0168 return answer; 0169 } 0170 0171 QStringList DB::FastDir::sortFileList(const QStringList &files) const 0172 { 0173 StringSet tmp; 0174 for (const QString &fileName : files) { 0175 tmp << fileName; 0176 } 0177 return sortFileList(tmp); 0178 } 0179 0180 // vi:expandtab:tabstop=4 shiftwidth=4: