File indexing completed on 2024-04-28 15:39:55

0001 /* SPDX-FileCopyrightText: 2010-2018 Jesper Pedersen <blackie@blackie.dk> and
0002    Robert Krawitz <rlk@alum.mit.edu>
0003 
0004    SPDX-License-Identifier: GPL-2.0-or-later
0005 */
0006 
0007 #include "FastDir.h"
0008 
0009 #include <kpabase/Logging.h>
0010 
0011 #include <QFile>
0012 #include <QLoggingCategory>
0013 #include <QMap>
0014 
0015 extern "C" {
0016 #include <dirent.h>
0017 #include <stdio.h>
0018 #include <sys/types.h>
0019 
0020 /*
0021  * Ideally the order of entries returned by readdir() should be close
0022  * to optimal; intuitively, it should reflect the order in which inodes
0023  * are returned from getdents() or equivalent, which should be the order
0024  * in which they're stored on the disk.  Experimentally, that isn't always
0025  * true.  One test, involving 10839 files totaling 90 GB resulted in
0026  * readdir() returning files in random order, where "find" returned them
0027  * sorted (and the same version of find does *not* in general return files
0028  * in alphabetical order).
0029  *
0030  * By repeated measurement, loading the files in the order returned by
0031  * readdir took about 16:30, where loading them in alphanumeric sorted order
0032  * took about 15:00.  Running a similar test outside of kpa (using the order
0033  * returned by readdir() vs. sorted to cat the files through dd and measuring
0034  * the time) yielded if anything an even greater discrepancy (17:35 vs. 14:10).
0035  *
0036  * This issue is filesystem dependent, but is known to affect the extN
0037  * filesystems commonly used on Linux that use a hashed tree structure to
0038  * store directories.  See e. g.
0039  * http://home.ifi.uio.no/paalh/publications/files/ipccc09.pdf and its
0040  * accompanying presentation
0041  * http://www.linux-kongress.org/2009/slides/linux_disk_io_performance_havard_espeland.pdf
0042  *
0043  * We could do even better by sorting by block position, but that would
0044  * greatly increase complexity.
0045  */
0046 #ifdef __linux__
0047 #include <linux/magic.h>
0048 #include <sys/vfs.h>
0049 #define HAVE_STATFS
0050 #define STATFS_FSTYPE_EXT2 EXT2_SUPER_MAGIC // Includes EXT3_SUPER_MAGIC, EXT4_SUPER_MAGIC
0051 #else
0052 #ifdef __FreeBSD__
0053 #include <sys/disklabel.h>
0054 #include <sys/mount.h>
0055 #include <sys/param.h>
0056 #define HAVE_STATFS
0057 #define STATFS_FSTYPE_EXT2 FS_EXT2FS
0058 #endif
0059 // other platforms fall back to known-safe (but slower) implementation
0060 #endif // __linux__
0061 }
0062 
0063 typedef QMap<ino_t, QString> InodeMap;
0064 
0065 typedef QSet<QString> StringSet;
0066 
0067 DB::FastDir::FastDir(const QString &path)
0068     : m_path(path)
0069 {
0070     InodeMap tmpAnswer;
0071     DIR *dir;
0072     dirent *file;
0073     QByteArray bPath(QFile::encodeName(path));
0074     dir = opendir(bPath.constData());
0075     if (!dir)
0076         return;
0077     const bool doSortByInode = sortByInode(bPath);
0078     const bool doSortByName = sortByName(bPath);
0079 
0080 #if defined(QT_THREAD_SUPPORT) && defined(_POSIX_THREAD_SAFE_FUNCTIONS) && !defined(Q_OS_CYGWIN)
0081     // ZaJ (2016-03-23): while porting to Qt5/KF5, this code-path is disabled on my system
0082     //     I don't want to touch this right now since I can't verify correctness in any way.
0083     // rlk 2018-05-20: readdir_r is deprecated as of glibc 2.24; see
0084     //     http://man7.org/linux/man-pages/man3/readdir_r.3.html.
0085     //     There are problems with MAXNAMLEN/NAME_MAX and friends, that
0086     //     can differ from filesystem to filesystem.  It's also expected
0087     //     that POSIX will (if it hasn't already) deprecate readdir_r
0088     //     and require readdir to be thread safe.
0089     union dirent_buf {
0090         struct KDE_struct_dirent mt_file;
0091         char b[sizeof(struct dirent) + MAXNAMLEN + 1];
0092     } *u = new union dirent_buf;
0093     while (readdir_r(dir, &(u->mt_file), &file) == 0 && file)
0094 #else
0095     // FIXME: use 64bit versions of readdir and dirent?
0096     while ((file = readdir(dir)))
0097 #endif // QT_THREAD_SUPPORT && _POSIX_THREAD_SAFE_FUNCTIONS
0098     {
0099         if (doSortByInode)
0100             tmpAnswer.insert(file->d_ino, QFile::decodeName(file->d_name));
0101         else
0102             m_sortedList.append(QFile::decodeName(file->d_name));
0103     }
0104 #if defined(QT_THREAD_SUPPORT) && defined(_POSIX_THREAD_SAFE_FUNCTIONS) && !defined(Q_OS_CYGWIN)
0105     delete u;
0106 #endif
0107     (void)closedir(dir);
0108 
0109     if (doSortByInode) {
0110         for (InodeMap::iterator it = tmpAnswer.begin(); it != tmpAnswer.end(); ++it) {
0111             m_sortedList << it.value();
0112         }
0113     } else if (doSortByName) {
0114         m_sortedList.sort();
0115     }
0116 }
0117 
0118 // No currently known filesystems where sort by name is optimal
0119 constexpr bool DB::sortByName(const QByteArray &)
0120 {
0121     return false;
0122 }
0123 
0124 bool DB::sortByInode(const QByteArray &path)
0125 {
0126 #ifdef HAVE_STATFS
0127     struct statfs buf;
0128     if (statfs(path.constData(), &buf) == -1)
0129         return -1;
0130     // Add other filesystems as appropriate
0131     switch (buf.f_type) {
0132     case STATFS_FSTYPE_EXT2:
0133         return true;
0134     default:
0135         return false;
0136     }
0137 #else // HAVE_STATFS
0138     Q_UNUSED(path);
0139     return false;
0140 #endif // HAVE_STATFS
0141 }
0142 
0143 const QStringList DB::FastDir::entryList() const
0144 {
0145     return m_sortedList;
0146 }
0147 
0148 QStringList DB::FastDir::sortFileList(const StringSet &files) const
0149 {
0150     QStringList answer;
0151     StringSet tmp(files);
0152     for (const QString &fileName : m_sortedList) {
0153         if (tmp.contains(fileName)) {
0154             answer << fileName;
0155             tmp.remove(fileName);
0156         } else if (tmp.contains(m_path + fileName)) {
0157             answer << m_path + fileName;
0158             tmp.remove(m_path + fileName);
0159         }
0160     }
0161     if (tmp.count() > 0) {
0162         qCDebug(FastDirLog) << "Files left over after sorting on " << m_path;
0163         for (const QString &fileName : tmp) {
0164             qCDebug(FastDirLog) << fileName;
0165             answer << fileName;
0166         }
0167     }
0168     return answer;
0169 }
0170 
0171 QStringList DB::FastDir::sortFileList(const QStringList &files) const
0172 {
0173     StringSet tmp;
0174     for (const QString &fileName : files) {
0175         tmp << fileName;
0176     }
0177     return sortFileList(tmp);
0178 }
0179 
0180 // vi:expandtab:tabstop=4 shiftwidth=4: