File indexing completed on 2024-04-28 15:17:24

0001 /*
0002     This file is part of the KDE Baloo Project
0003     SPDX-FileCopyrightText: 2015 Vishesh Handa <vhanda@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
0006 */
0007 
0008 #include <QCommandLineParser>
0009 #include <QCoreApplication>
0010 #include <QDebug>
0011 #include <QDir>
0012 #include <QDirIterator>
0013 #include <QElapsedTimer>
0014 #include <QMimeDatabase>
0015 #include <QTemporaryDir>
0016 
0017 #include "../../tests/file/util.h"
0018 #include "database.h"
0019 #include "databasesize.h"
0020 #include "src/file/basicindexingjob.h"
0021 #include "transaction.h"
0022 
0023 using namespace Baloo;
0024 
0025 int main(int argc, char** argv)
0026 {
0027     QCoreApplication app(argc, argv);
0028 
0029     QTemporaryDir tempDir;
0030     qDebug() << "Creating temporary DB in" << tempDir.path();
0031 
0032     Database db(tempDir.path());
0033     db.open(Baloo::Database::CreateDatabase);
0034 
0035     Transaction tr(db, Transaction::ReadWrite);
0036 
0037     QCommandLineParser parser;
0038     parser.addHelpOption();
0039     parser.addOption({QStringLiteral("s"), QStringLiteral("Maximum transaction size (number of documents)"), QStringLiteral("size"), QStringLiteral("50000")});
0040     parser.addOption({QStringLiteral("k"), QStringLiteral("Keep temporary DB (for analysis)")});
0041     parser.addPositionalArgument(QStringLiteral("dir"), QStringLiteral("Index root directory"));
0042     parser.process(app);
0043 
0044     const bool keepDb = parser.isSet(QStringLiteral("k"));
0045     tempDir.setAutoRemove(!keepDb);
0046 
0047     const uint transactionSize = parser.value(QStringLiteral("s")).toUInt();
0048     auto arguments = parser.positionalArguments();
0049     if (arguments.size() > 1 || !transactionSize) {
0050         parser.showHelp(1);
0051     }
0052     const QString path = [&arguments, &parser]() {
0053         if (arguments.empty()) {
0054             return QDir::homePath();
0055         } else {
0056             QFileInfo fi(arguments[0]);
0057             if (fi.isDir()) {
0058                 return fi.canonicalFilePath();
0059             }
0060             parser.showHelp(1);
0061         }
0062     }();
0063     qDebug() << "Indexing documents in" << path << "\nTransaction size:" << transactionSize;
0064 
0065     {
0066         QMimeDatabase mimeDb;
0067         QElapsedTimer timer;
0068         timer.start();
0069 
0070         QDirIterator it(path, QDir::NoDotAndDotDot | QDir::Files | QDir::Dirs, QDirIterator::Subdirectories);
0071         uint num = 0;
0072         while (it.hasNext()) {
0073             const QString& path = it.next();
0074             const QString& mimetype = mimeDb.mimeTypeForFile(path, QMimeDatabase::MatchExtension).name();
0075 
0076             BasicIndexingJob job(path, mimetype);
0077             if (!job.index()) {
0078                 continue;
0079             }
0080 
0081             if (tr.hasDocument(job.document().id())) {
0082                 qDebug() << "Skip" << path;
0083             } else {
0084                 tr.addDocument(job.document());
0085                 num++;
0086             }
0087 
0088             if ((num % transactionSize) == 0) {
0089                 tr.commit();
0090                 tr.reset(Transaction::ReadWrite);
0091                 qDebug() << num << "- Commit";
0092             }
0093         }
0094         tr.commit();
0095 
0096         qDebug() << "Done -" << timer.elapsed() << "msecs," << num << "documents";
0097     }
0098 
0099     {
0100         Transaction tr(db, Transaction::ReadOnly);
0101         const auto dbSize = tr.dbSize();
0102         qDebug() << "File size (MiB):" << dbSize.actualSize / (1024.0 * 1024)
0103                  << "Used:" << dbSize.expectedSize / (1024.0 * 1024);
0104     }
0105     printIOUsage();
0106 
0107     return 0;
0108 }