File indexing completed on 2025-01-05 03:54:13

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam project
0004  * https://www.digikam.org
0005  *
0006  * Date        : 2017-07-08
0007  * Description : Similarity database interface.
0008  *
0009  * SPDX-FileCopyrightText:      2009 by Marcel Wiesweg <marcel dot wiesweg at gmx dot de>
0010  * SPDX-FileCopyrightText: 2009-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0011  * SPDX-FileCopyrightText:      2017 by Swati Lodha    <swatilodha27 at gmail dot com>
0012  * SPDX-FileCopyrightText:      2018 by Mario Frank    <mario dot frank at uni minus potsdam dot de>
0013  *
0014  * SPDX-License-Identifier: GPL-2.0-or-later
0015  *
0016  * ============================================================ */
0017 
0018 #include "similaritydb.h"
0019 
0020 // Local includes
0021 
0022 #include "digikam_debug.h"
0023 
0024 namespace Digikam
0025 {
0026 
0027 class Q_DECL_HIDDEN SimilarityDb::Private
0028 {
0029 
0030 public:
0031 
0032     explicit Private() :
0033         db(nullptr)
0034     {
0035     }
0036 
0037     SimilarityDbBackend* db;
0038 };
0039 
0040 SimilarityDb::SimilarityDb(SimilarityDbBackend* const backend)
0041     : d(new Private)
0042 {
0043     d->db = backend;
0044 }
0045 
0046 SimilarityDb::~SimilarityDb()
0047 {
0048     delete d;
0049 }
0050 
0051 bool SimilarityDb::setSetting(const QString& keyword, const QString& value )
0052 {
0053     QMap<QString, QVariant> parameters;
0054     parameters.insert(QLatin1String(":keyword"), keyword);
0055     parameters.insert(QLatin1String(":value"), value);
0056 
0057     BdEngineBackend::QueryState queryStateResult =
0058             d->db->execDBAction(d->db->getDBAction(QString::fromUtf8("ReplaceSimilaritySetting")),
0059                                                    parameters);
0060 
0061     return (queryStateResult == BdEngineBackend::NoErrors);
0062 }
0063 
0064 QString SimilarityDb::getSetting(const QString& keyword)
0065 {
0066     QMap<QString, QVariant> parameters;
0067     parameters.insert(QLatin1String(":keyword"), keyword);
0068     QList<QVariant> values;
0069 
0070     // TODO Should really check return status here
0071 
0072     BdEngineBackend::QueryState queryStateResult =
0073             d->db->execDBAction(d->db->getDBAction(QString::fromUtf8("SelectSimilaritySetting")),
0074                                                    parameters, &values);
0075     qCDebug(DIGIKAM_SIMILARITYDB_LOG) << "SimilarityDb SelectSimilaritySetting val ret = "
0076                                       << (BdEngineBackend::QueryStateEnum)queryStateResult;
0077 
0078     if (values.isEmpty())
0079     {
0080         return QString();
0081     }
0082 
0083     return values.first().toString();
0084 }
0085 
0086 QString SimilarityDb::getLegacySetting(const QString& keyword)
0087 {
0088     QMap<QString, QVariant> parameters;
0089     parameters.insert(QLatin1String(":keyword"), keyword);
0090     QList<QVariant> values;
0091 
0092     // TODO Should really check return status here
0093 
0094     BdEngineBackend::QueryState queryStateResult =
0095             d->db->execDBAction(d->db->getDBAction(QString::fromUtf8("SelectSimilarityLegacySetting")),
0096                                                    parameters, &values);
0097     qCDebug(DIGIKAM_SIMILARITYDB_LOG) << "SimilarityDb SelectSimilaritySetting val ret = "
0098                                       << (BdEngineBackend::QueryStateEnum)queryStateResult;
0099 
0100     if (values.isEmpty())
0101     {
0102         return QString();
0103     }
0104 
0105     return values.first().toString();
0106 }
0107 
0108 // ----------- General methods for entry access ----------
0109 
0110 QSet<qlonglong> SimilarityDb::registeredImageIds() const
0111 {
0112     QSet<qlonglong> imageIds;
0113     QList<QVariant> values;
0114 
0115     // Get all image ids from the first and second imageid column of the ImageSimilarity table.
0116 
0117     d->db->execSql(QString::fromUtf8("SELECT imageid1, imageid2 FROM ImageSimilarity;"),
0118                    &values);
0119 
0120     for (QList<QVariant>::const_iterator it = values.constBegin() ; it != values.constEnd() ; )
0121     {
0122         imageIds << (*it).toLongLong();
0123         ++it;
0124         imageIds << (*it).toLongLong();
0125         ++it;
0126     }
0127 
0128     values.clear();
0129 
0130     // get all image ids from the ImageHaarMatrix table.
0131 
0132     d->db->execSql(QString::fromUtf8("SELECT imageid FROM ImageHaarMatrix;"),
0133                    &values);
0134 
0135     Q_FOREACH (const QVariant& var, values)
0136     {
0137         imageIds << var.toLongLong();
0138     }
0139 
0140     return imageIds;
0141 }
0142 
0143 // ----------- Methods for fingerprint (ImageHaarMatrix) table access ----------
0144 
0145 bool SimilarityDb::hasFingerprint(qlonglong imageId, FuzzyAlgorithm algorithm) const
0146 {
0147     if (algorithm == FuzzyAlgorithm::Haar)
0148     {
0149         QList<QVariant> values;
0150 
0151         d->db->execSql(QString::fromUtf8("SELECT imageid FROM ImageHaarMatrix "
0152                                          "WHERE matrix IS NOT NULL AND imageid=? LIMIT 1;"),
0153                        imageId, &values);
0154 
0155         // return true if there is at least one fingerprint
0156 
0157         return !values.isEmpty();
0158     }
0159 
0160     return false;
0161 }
0162 
0163 
0164 bool SimilarityDb::hasFingerprints()
0165 {
0166     return hasFingerprints(FuzzyAlgorithm::Haar);
0167 }
0168 
0169 bool SimilarityDb::hasFingerprints(FuzzyAlgorithm algorithm) const
0170 {
0171     if (algorithm == FuzzyAlgorithm::Haar)
0172     {
0173         QList<QVariant> values;
0174 
0175         d->db->execSql(QString::fromUtf8("SELECT imageid FROM ImageHaarMatrix "
0176                                          "WHERE matrix IS NOT NULL LIMIT 1;"),
0177                        &values);
0178 
0179         // return true if there is at least one fingerprint
0180 
0181         return !values.isEmpty();
0182     }
0183 
0184     return false;
0185 }
0186 
0187 bool SimilarityDb::hasDirtyOrMissingFingerprint(const ItemInfo& imageInfo, FuzzyAlgorithm algorithm) const
0188 {
0189     if (algorithm == FuzzyAlgorithm::Haar)
0190     {
0191         QList<QVariant> values;
0192 
0193         d->db->execSql(QString::fromUtf8("SELECT modificationDate, uniqueHash FROM ImageHaarMatrix "
0194                                          "WHERE imageid=?;"),
0195                        imageInfo.id(), &values);
0196 
0197         if (values.isEmpty())
0198         {
0199             // The image id does not exist -> missing fingerprint
0200 
0201             return true;
0202         }
0203         else
0204         {
0205             // The image id exists -> if uniqueHash or modificationDate differ, we need a new fingerprint.
0206 
0207             if (values.size() == 2)
0208             {
0209                 QDateTime modDateTime = values.at(0).toDateTime();
0210                 modDateTime.setTimeSpec(Qt::UTC);
0211 
0212                 if ((modDateTime             != imageInfo.modDateTime()) ||
0213                     (values.at(1).toString() != imageInfo.uniqueHash()))
0214                 {
0215                     return true;
0216                 }
0217             }
0218         }
0219     }
0220 
0221     return false;
0222 }
0223 
0224 QList<qlonglong> SimilarityDb::getDirtyOrMissingFingerprints(const QList<ItemInfo>& imageInfos,
0225                                                              FuzzyAlgorithm algorithm)
0226 {
0227     QList<qlonglong> itemIDs;
0228 
0229     if (algorithm == FuzzyAlgorithm::Haar)
0230     {
0231         Q_FOREACH (const ItemInfo& info, imageInfos)
0232         {
0233             QList<QVariant> values;
0234 
0235             d->db->execSql(QString::fromUtf8("SELECT modificationDate, uniqueHash FROM ImageHaarMatrix "
0236                                              "WHERE imageid=?;"),
0237                            info.id(), &values);
0238 
0239             if (values.isEmpty())
0240             {
0241                 // The image id does not exist -> missing fingerprint
0242 
0243                 itemIDs << info.id();
0244             }
0245             else
0246             {
0247                 // The image id exists -> if uniqueHash or modificationDate differ, we need a new fingerprint.
0248 
0249                 if (values.size() == 2)
0250                 {
0251                     QDateTime modDateTime = values.at(0).toDateTime();
0252                     modDateTime.setTimeSpec(Qt::UTC);
0253 
0254                     if ((modDateTime             != info.modDateTime()) ||
0255                         (values.at(1).toString() != info.uniqueHash()))
0256                     {
0257                         itemIDs << info.id();
0258                     }
0259                 }
0260             }
0261         }
0262     }
0263 
0264     return itemIDs;
0265 }
0266 
0267 QStringList SimilarityDb::getDirtyOrMissingFingerprintURLs(const QList<ItemInfo>& imageInfos,
0268                                                            FuzzyAlgorithm algorithm)
0269 {
0270     QStringList urls;
0271 
0272     if (algorithm == FuzzyAlgorithm::Haar)
0273     {
0274         Q_FOREACH (const ItemInfo& info, imageInfos)
0275         {
0276             QList<QVariant> values;
0277 
0278             d->db->execSql(QString::fromUtf8("SELECT modificationDate, uniqueHash FROM ImageHaarMatrix "
0279                                              "WHERE imageid=?;"),
0280                            info.id(), &values);
0281 
0282             if (values.isEmpty())
0283             {
0284                 // The image id does not exist -> missing fingerprint
0285 
0286                 urls << info.filePath();
0287             }
0288             else
0289             {
0290                 // The image id exists -> if uniqueHash or modificationDate differ, we need a new fingerprint.
0291 
0292                 if (values.size() == 2)
0293                 {
0294                     QDateTime modDateTime = values.at(0).toDateTime();
0295                     modDateTime.setTimeSpec(Qt::UTC);
0296 
0297                     if ((modDateTime             != info.modDateTime()) ||
0298                         (values.at(1).toString() != info.uniqueHash()))
0299                     {
0300                         urls << info.filePath();
0301                     }
0302                 }
0303             }
0304         }
0305     }
0306 
0307     return urls;
0308 }
0309 
0310 void SimilarityDb::copySimilarityAttributes(qlonglong srcId, qlonglong dstId)
0311 {
0312     // Go through ImageHaarMatrix table and copy the entries
0313 
0314     d->db->execSql(QString::fromUtf8("REPLACE INTO ImageHaarMatrix "
0315                                      "(imageid, modificationDate, uniqueHash, matrix) "
0316                                      "SELECT ?, modificationDate, uniqueHash, matrix "
0317                                      " FROM ImageHaarMatrix WHERE imageid=?;"),
0318                    dstId, srcId);
0319 }
0320 
0321 
0322 void SimilarityDb::removeImageFingerprint(qlonglong imageID,
0323                                           FuzzyAlgorithm algorithm)
0324 {
0325     if (algorithm == FuzzyAlgorithm::Haar)
0326     {
0327         d->db->execSql(QString::fromUtf8("DELETE FROM ImageHaarMatrix WHERE imageid=?;"),
0328                        imageID);
0329     }
0330     else if (algorithm == FuzzyAlgorithm::TfIdf)
0331     {
0332         // TODO: Extend this when we have another algorithm in place.
0333     }
0334 
0335     // Also, remove all similarities for the image and algorithm if the backend is a MySQL DB.
0336 
0337     if (d->db->databaseType() == BdEngineBackend::DbType::MySQL)
0338     {
0339         removeImageSimilarity(imageID, algorithm);
0340     }
0341 }
0342 
0343 // ----------- Methods for image similarity table access ----------
0344 
0345 double SimilarityDb::getImageSimilarity(qlonglong imageID1, qlonglong imageID2, FuzzyAlgorithm algorithm)
0346 {
0347     if ((imageID1 < 0) || (imageID2 < 0))
0348     {
0349         return -1;
0350     }
0351 
0352     // If the image ids are the same, we return 1 which is equivalent to 100%.
0353     // We do not have to access the database here as the same image id implies
0354     // the same image and thus identity.
0355 
0356     if (imageID1 == imageID2)
0357     {
0358         return 1;
0359     }
0360 
0361     // The image ids in each entry of the ImageSimilarity table follow a partial ordering,
0362     // meaning that the first image id is smaller or equal to the second id.
0363     // This has the advantage that only one entry exists for a pair of two images and one algorithm.
0364     // Also, no disjunction is necessary in the query.
0365 
0366     QPair<qlonglong, qlonglong> orderedIds = orderIds(imageID1, imageID2);
0367 
0368     QString similarityValueString = getImageSimilarityOrdered(orderedIds.first, orderedIds.second, algorithm);
0369 
0370     // If the similarity is non-null
0371 
0372     if (!similarityValueString.isEmpty())
0373     {
0374         bool ok;
0375         double val = similarityValueString.toDouble(&ok);
0376 
0377         if (ok)
0378         {
0379             return val;
0380         }
0381 
0382         qCDebug(DIGIKAM_SIMILARITYDB_LOG()) << "The similarity value for images with ids "
0383                                             << imageID1 << " and " << imageID2
0384                                             << " for similarity algorithm with id " << (int)algorithm
0385                                             << " cannot be transformed into a double! The value is "
0386                                             << similarityValueString;
0387     }
0388 
0389     // Return the info that there is no value.
0390 
0391     return -1;
0392 }
0393 
0394 void SimilarityDb::setImageSimilarity(qlonglong imageID1, qlonglong imageID2, double value, FuzzyAlgorithm algorithm)
0395 {
0396     // We don't do anything if the image ids are identical as this is a waste of space.
0397 
0398     if (imageID1 == imageID2)
0399     {
0400         return;
0401     }
0402 
0403     // The image ids in each entry of the ImageSimilarity table follow a partial ordering,
0404     // meaning that the first image id is smaller or equal to the second id.
0405     // This has the advantage that only one entry exists for a pair of two images and one algorithm.
0406     // Also, no disjunction is necessary in the query.
0407 
0408     QPair<qlonglong, qlonglong> orderedIds = orderIds(imageID1, imageID2);
0409 
0410     //Check if entry already exists for above pair of images.(Avoiding duplicate entries)
0411 
0412     QString similarityValueString          = getImageSimilarityOrdered(orderedIds.first,
0413                                                                        orderedIds.second,
0414                                                                        algorithm);
0415 
0416     if (!similarityValueString.isEmpty())
0417     {
0418         bool ok;
0419         double val = similarityValueString.toDouble(&ok);
0420 
0421         if (ok && (val == value))
0422         {
0423             return;
0424         }
0425     }
0426 
0427     d->db->execSql(QString::fromUtf8("REPLACE INTO ImageSimilarity "
0428                                      "(imageid1, imageid2, algorithm, value) "
0429                                      " VALUES(?, ?, ?, ?);"),
0430                    orderedIds.first, orderedIds.second, (int)algorithm, value);
0431 }
0432 
0433 void SimilarityDb::removeImageSimilarity(qlonglong imageID, FuzzyAlgorithm algorithm)
0434 {
0435     d->db->execSql(QString::fromUtf8("DELETE FROM ImageSimilarity "
0436                                      "WHERE ( imageid1=? OR imageid2=? ) AND algorithm=?;"),
0437                    imageID, imageID, (int)algorithm);
0438 }
0439 
0440 void SimilarityDb::removeImageSimilarity(qlonglong imageID1, qlonglong imageID2, FuzzyAlgorithm algorithm)
0441 {
0442     QPair<qlonglong, qlonglong> orderedIds = orderIds(imageID1, imageID2);
0443 
0444     d->db->execSql(QString::fromUtf8("DELETE FROM ImageSimilarity "
0445                                      "WHERE imageid1=? AND imageid2=? AND algorithm=?;"),
0446                    orderedIds.first, orderedIds.second, (int)algorithm);
0447 }
0448 
0449 void SimilarityDb::clearImageSimilarity(FuzzyAlgorithm algorithm)
0450 {
0451     d->db->execSql(QString::fromUtf8("DELETE FROM ImageSimilarity "
0452                                      "WHERE algorithm=?;"),
0453                    (int)algorithm);
0454 }
0455 
0456 QList<FuzzyAlgorithm> SimilarityDb::getImageSimilarityAlgorithms(qlonglong imageID1, qlonglong imageID2)
0457 {
0458 
0459     QPair<qlonglong, qlonglong> orderedIds = orderIds(imageID1, imageID2);
0460 
0461     QList<QVariant> values;
0462 
0463     d->db->execSql(QString::fromUtf8("SELECT algorithm FROM ImageSimilarity "
0464                                      "WHERE imageid1=? AND imageid2=?;"),
0465                    orderedIds.first, orderedIds.second, &values);
0466 
0467     QList<FuzzyAlgorithm> algorithms;
0468 
0469     Q_FOREACH (const QVariant& var, values)
0470     {
0471         int algorithmId = var.toInt();
0472 
0473         if      (algorithmId == 1)
0474         {
0475             algorithms << FuzzyAlgorithm::Haar;
0476         }
0477         else if (algorithmId == 2)
0478         {
0479             algorithms << FuzzyAlgorithm::TfIdf;
0480         }
0481     }
0482 
0483     return algorithms;
0484 }
0485 
0486 // ----------- Database shrinking and integrity check methods ----------
0487 
0488 bool SimilarityDb::integrityCheck()
0489 {
0490     QList<QVariant> values;
0491     d->db->execDBAction(d->db->getDBAction(QString::fromUtf8("checkSimilarityDbIntegrity")), &values);
0492 
0493     switch (d->db->databaseType())
0494     {
0495         case BdEngineBackend::DbType::SQLite:
0496         {
0497             // For SQLite the integrity check returns a single row with one string column "ok" on success and multiple rows on error.
0498 
0499             return ((values.size() == 1) && (values.first().toString().toLower().compare(QLatin1String("ok")) == 0));
0500         }
0501         case BdEngineBackend::DbType::MySQL:
0502         {
0503             // For MySQL, for every checked table, the table name, operation (check), message type (status) and the message text (ok on success)
0504             // are returned. So we check if there are four elements and if yes, whether the fourth element is "ok".
0505             //qCDebug(DIGIKAM_DATABASE_LOG) << "MySQL check returned " << values.size() << " rows";
0506 
0507             if ((values.size() % 4) != 0)
0508             {
0509                 return false;
0510             }
0511 
0512             for (QList<QVariant>::iterator it = values.begin() ; it != values.end() ; )
0513             {
0514                 QString tableName   = (*it).toString();
0515                 ++it;
0516                 QString operation   = (*it).toString();
0517                 ++it;
0518                 QString messageType = (*it).toString();
0519                 ++it;
0520                 QString messageText = (*it).toString();
0521                 ++it;
0522 
0523                 if (messageText.toLower().compare(QLatin1String("ok")) != 0)
0524                 {
0525                     qCDebug(DIGIKAM_DATABASE_LOG) << "Failed integrity check for table "
0526                                                   << tableName << ". Reason:" << messageText;
0527                     return false;
0528                 }
0529                 else
0530                 {
0531 /*
0532                     qCDebug(DIGIKAM_DATABASE_LOG) << "Passed integrity check for table "
0533                                                   << tableName;
0534 */
0535                 }
0536             }
0537 
0538             // No error conditions. Db passed the integrity check.
0539 
0540             return true;
0541         }
0542         default:
0543         {
0544             return false;
0545         }
0546     }
0547 }
0548 
0549 void SimilarityDb::vacuum()
0550 {
0551     d->db->execDBAction(d->db->getDBAction(QString::fromUtf8("vacuumSimilarityDB")));
0552 }
0553 
0554 // ----------- Private methods ----------
0555 
0556 QPair<qlonglong, qlonglong> SimilarityDb::orderIds(qlonglong id1, qlonglong id2)
0557 {
0558     QPair<qlonglong, qlonglong> ordered;
0559 
0560     if (id1 <= id2)
0561     {
0562         // If the first id is smaller or equal to the second, set it as first.
0563 
0564         ordered.first  = id1;
0565         ordered.second = id2;
0566     }
0567     else
0568     {
0569         ordered.first  = id2;
0570         ordered.second = id1;
0571     }
0572 
0573     return ordered;
0574 }
0575 
0576 QString SimilarityDb::getImageSimilarityOrdered(qlonglong imageID1, qlonglong imageID2, FuzzyAlgorithm algorithm)
0577 {
0578     QList<QVariant> values;
0579 
0580     d->db->execSql(QString::fromUtf8("SELECT value FROM ImageSimilarity "
0581                                      "WHERE ( imageid1=? OR imageid2=? ) AND algorithm=?;"),
0582                    imageID1, imageID2, (int)algorithm, &values);
0583 
0584     if (values.isEmpty())
0585     {
0586         return QString();
0587     }
0588 
0589     return values.first().toString();
0590 }
0591 
0592 } // namespace Digikam