File indexing completed on 2024-12-15 05:02:05

0001 /*
0002     SPDX-FileCopyrightText: 2016 Ivan Čukić <ivan.cukic(at)kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
0005 */
0006 
0007 #include "RecollRunner.h"
0008 
0009 #include <QDebug>
0010 #include <QDir>
0011 #include <QMimeDatabase>
0012 #include <KPluginFactory>
0013 
0014 #include <KLocalizedString>
0015 
0016 #include "rclconfig.h"
0017 #include "rcldb.h"
0018 #include "rclinit.h"
0019 #include "rclquery.h"
0020 #include "wasatorcl.h"
0021 #include "docseqdb.h"
0022 
0023 #include <memory>
0024 
0025 #define RECOLL_MAXIMUM_SCORE .8
0026 
0027 BLADE_EXPORT_PLUGIN(recollrunner, RecollRunner, "blade-plugin-recoll.json")
0028 
0029 class RecollRunner::Private {
0030 public:
0031     Private()
0032         : valid(false)
0033         , rclconfig(nullptr)
0034     {
0035     }
0036 
0037     bool valid;
0038     RclConfig *rclconfig;
0039     std::unique_ptr<Rcl::Db> rcldb;
0040 
0041 };
0042 
0043 RecollRunner::RecollRunner(QObject *parent, const QVariantList &args)
0044     : AbstractRunner(parent)
0045     , d(new Private())
0046 {
0047     std::string reason;
0048     d->rclconfig = recollinit(0, 0, reason);
0049 
0050     if (!d->rclconfig || !d->rclconfig->ok()) {
0051         qWarning() << "Recoll configuration problem: " << reason.data();
0052         return;
0053     }
0054 
0055     // if (d->rclconfig->getDbDir().empty()) {
0056     //     qWarning() << "No db directory in configuration!?";
0057     //     return;
0058     // }
0059 
0060     d->rcldb.reset(new Rcl::Db(d->rclconfig));
0061 
0062     if (!d->rcldb->open(Rcl::Db::DbRO)) {
0063         // qWarning() << "Failed to open the database: " << d->rclconfig->getDbDir().data();
0064         return;
0065     }
0066 
0067     d->valid = true;
0068 }
0069 
0070 RecollRunner::~RecollRunner()
0071 {
0072     delete d;
0073 }
0074 
0075 void RecollRunner::query()
0076 {
0077     qDebug() << queryString();
0078     emit startedProcessingQuery();
0079 
0080     auto queryString = this->queryString().toUtf8();
0081 
0082     if (d->valid && queryString.length() >= 3) {
0083 
0084         auto query = std::make_shared<Rcl::Query>(d->rcldb.get());
0085         query->setCollapseDuplicates(true);
0086 
0087         std::string reason;
0088         auto sdata = std::shared_ptr<Rcl::SearchData>(
0089             wasaStringToRcl(d->rclconfig, "english" /*stemlang*/,
0090                             (const char *)queryString, reason));
0091 
0092         if (!query->setQuery(sdata)) {
0093             qWarning() << "Wrong query";
0094         } else {
0095             DocSequenceDb seq(query, "Results", sdata);
0096 
0097             Rcl::Doc doc;
0098 
0099             ResultList results;
0100 
0101             for (int i = 0; i < seq.getResCnt(); ++i) {
0102                 // if (doc.pc < 50) break;
0103 
0104                 doc.erase();
0105                 seq.getDoc(i, doc);
0106                 qDebug() << "########\n" << doc.url.data() << " " << doc.pc << "%";
0107 
0108                 QString description;
0109 
0110 
0111                 Result result;
0112 
0113                 result.icon        = "text";
0114                 result.url         = QString::fromUtf8(doc.url.data());
0115                 result.title       = result.url.fileName();
0116                 result.relevance   = doc.pc / 100.0 * RECOLL_MAXIMUM_SCORE;
0117 
0118                 std::vector<std::string> vabs;
0119                 seq.getAbstract(doc, vabs);
0120 
0121                 if (vabs.size() > 0) {
0122                     result.description = QString::fromUtf8(vabs[0].data());
0123 
0124                     for (const auto &vab: vabs) {
0125                         result.matchedText += QString::fromUtf8(vab.data()) + " ";
0126                     }
0127                 }
0128 
0129                 results << result;
0130 
0131                 if (i % 5 == 0) {
0132                     emit reportNewResults(results);
0133                     results.clear();
0134                     if (i > 20) break;
0135                 }
0136             }
0137         }
0138     }
0139 
0140     emit finishedProcessingQuery();
0141 }
0142 
0143 #include "RecollRunner.moc"
0144