File indexing completed on 2025-01-05 03:54:13
0001 /* ============================================================ 0002 * 0003 * This file is a part of digiKam project 0004 * https://www.digikam.org 0005 * 0006 * Date : 2017-07-08 0007 * Description : Similarity database interface. 0008 * 0009 * SPDX-FileCopyrightText: 2009 by Marcel Wiesweg <marcel dot wiesweg at gmx dot de> 0010 * SPDX-FileCopyrightText: 2009-2024 by Gilles Caulier <caulier dot gilles at gmail dot com> 0011 * SPDX-FileCopyrightText: 2017 by Swati Lodha <swatilodha27 at gmail dot com> 0012 * SPDX-FileCopyrightText: 2018 by Mario Frank <mario dot frank at uni minus potsdam dot de> 0013 * 0014 * SPDX-License-Identifier: GPL-2.0-or-later 0015 * 0016 * ============================================================ */ 0017 0018 #include "similaritydb.h" 0019 0020 // Local includes 0021 0022 #include "digikam_debug.h" 0023 0024 namespace Digikam 0025 { 0026 0027 class Q_DECL_HIDDEN SimilarityDb::Private 0028 { 0029 0030 public: 0031 0032 explicit Private() : 0033 db(nullptr) 0034 { 0035 } 0036 0037 SimilarityDbBackend* db; 0038 }; 0039 0040 SimilarityDb::SimilarityDb(SimilarityDbBackend* const backend) 0041 : d(new Private) 0042 { 0043 d->db = backend; 0044 } 0045 0046 SimilarityDb::~SimilarityDb() 0047 { 0048 delete d; 0049 } 0050 0051 bool SimilarityDb::setSetting(const QString& keyword, const QString& value ) 0052 { 0053 QMap<QString, QVariant> parameters; 0054 parameters.insert(QLatin1String(":keyword"), keyword); 0055 parameters.insert(QLatin1String(":value"), value); 0056 0057 BdEngineBackend::QueryState queryStateResult = 0058 d->db->execDBAction(d->db->getDBAction(QString::fromUtf8("ReplaceSimilaritySetting")), 0059 parameters); 0060 0061 return (queryStateResult == BdEngineBackend::NoErrors); 0062 } 0063 0064 QString SimilarityDb::getSetting(const QString& keyword) 0065 { 0066 QMap<QString, QVariant> parameters; 0067 parameters.insert(QLatin1String(":keyword"), keyword); 0068 QList<QVariant> values; 0069 0070 // TODO Should really check return status here 0071 0072 BdEngineBackend::QueryState queryStateResult = 0073 d->db->execDBAction(d->db->getDBAction(QString::fromUtf8("SelectSimilaritySetting")), 0074 parameters, &values); 0075 qCDebug(DIGIKAM_SIMILARITYDB_LOG) << "SimilarityDb SelectSimilaritySetting val ret = " 0076 << (BdEngineBackend::QueryStateEnum)queryStateResult; 0077 0078 if (values.isEmpty()) 0079 { 0080 return QString(); 0081 } 0082 0083 return values.first().toString(); 0084 } 0085 0086 QString SimilarityDb::getLegacySetting(const QString& keyword) 0087 { 0088 QMap<QString, QVariant> parameters; 0089 parameters.insert(QLatin1String(":keyword"), keyword); 0090 QList<QVariant> values; 0091 0092 // TODO Should really check return status here 0093 0094 BdEngineBackend::QueryState queryStateResult = 0095 d->db->execDBAction(d->db->getDBAction(QString::fromUtf8("SelectSimilarityLegacySetting")), 0096 parameters, &values); 0097 qCDebug(DIGIKAM_SIMILARITYDB_LOG) << "SimilarityDb SelectSimilaritySetting val ret = " 0098 << (BdEngineBackend::QueryStateEnum)queryStateResult; 0099 0100 if (values.isEmpty()) 0101 { 0102 return QString(); 0103 } 0104 0105 return values.first().toString(); 0106 } 0107 0108 // ----------- General methods for entry access ---------- 0109 0110 QSet<qlonglong> SimilarityDb::registeredImageIds() const 0111 { 0112 QSet<qlonglong> imageIds; 0113 QList<QVariant> values; 0114 0115 // Get all image ids from the first and second imageid column of the ImageSimilarity table. 0116 0117 d->db->execSql(QString::fromUtf8("SELECT imageid1, imageid2 FROM ImageSimilarity;"), 0118 &values); 0119 0120 for (QList<QVariant>::const_iterator it = values.constBegin() ; it != values.constEnd() ; ) 0121 { 0122 imageIds << (*it).toLongLong(); 0123 ++it; 0124 imageIds << (*it).toLongLong(); 0125 ++it; 0126 } 0127 0128 values.clear(); 0129 0130 // get all image ids from the ImageHaarMatrix table. 0131 0132 d->db->execSql(QString::fromUtf8("SELECT imageid FROM ImageHaarMatrix;"), 0133 &values); 0134 0135 Q_FOREACH (const QVariant& var, values) 0136 { 0137 imageIds << var.toLongLong(); 0138 } 0139 0140 return imageIds; 0141 } 0142 0143 // ----------- Methods for fingerprint (ImageHaarMatrix) table access ---------- 0144 0145 bool SimilarityDb::hasFingerprint(qlonglong imageId, FuzzyAlgorithm algorithm) const 0146 { 0147 if (algorithm == FuzzyAlgorithm::Haar) 0148 { 0149 QList<QVariant> values; 0150 0151 d->db->execSql(QString::fromUtf8("SELECT imageid FROM ImageHaarMatrix " 0152 "WHERE matrix IS NOT NULL AND imageid=? LIMIT 1;"), 0153 imageId, &values); 0154 0155 // return true if there is at least one fingerprint 0156 0157 return !values.isEmpty(); 0158 } 0159 0160 return false; 0161 } 0162 0163 0164 bool SimilarityDb::hasFingerprints() 0165 { 0166 return hasFingerprints(FuzzyAlgorithm::Haar); 0167 } 0168 0169 bool SimilarityDb::hasFingerprints(FuzzyAlgorithm algorithm) const 0170 { 0171 if (algorithm == FuzzyAlgorithm::Haar) 0172 { 0173 QList<QVariant> values; 0174 0175 d->db->execSql(QString::fromUtf8("SELECT imageid FROM ImageHaarMatrix " 0176 "WHERE matrix IS NOT NULL LIMIT 1;"), 0177 &values); 0178 0179 // return true if there is at least one fingerprint 0180 0181 return !values.isEmpty(); 0182 } 0183 0184 return false; 0185 } 0186 0187 bool SimilarityDb::hasDirtyOrMissingFingerprint(const ItemInfo& imageInfo, FuzzyAlgorithm algorithm) const 0188 { 0189 if (algorithm == FuzzyAlgorithm::Haar) 0190 { 0191 QList<QVariant> values; 0192 0193 d->db->execSql(QString::fromUtf8("SELECT modificationDate, uniqueHash FROM ImageHaarMatrix " 0194 "WHERE imageid=?;"), 0195 imageInfo.id(), &values); 0196 0197 if (values.isEmpty()) 0198 { 0199 // The image id does not exist -> missing fingerprint 0200 0201 return true; 0202 } 0203 else 0204 { 0205 // The image id exists -> if uniqueHash or modificationDate differ, we need a new fingerprint. 0206 0207 if (values.size() == 2) 0208 { 0209 QDateTime modDateTime = values.at(0).toDateTime(); 0210 modDateTime.setTimeSpec(Qt::UTC); 0211 0212 if ((modDateTime != imageInfo.modDateTime()) || 0213 (values.at(1).toString() != imageInfo.uniqueHash())) 0214 { 0215 return true; 0216 } 0217 } 0218 } 0219 } 0220 0221 return false; 0222 } 0223 0224 QList<qlonglong> SimilarityDb::getDirtyOrMissingFingerprints(const QList<ItemInfo>& imageInfos, 0225 FuzzyAlgorithm algorithm) 0226 { 0227 QList<qlonglong> itemIDs; 0228 0229 if (algorithm == FuzzyAlgorithm::Haar) 0230 { 0231 Q_FOREACH (const ItemInfo& info, imageInfos) 0232 { 0233 QList<QVariant> values; 0234 0235 d->db->execSql(QString::fromUtf8("SELECT modificationDate, uniqueHash FROM ImageHaarMatrix " 0236 "WHERE imageid=?;"), 0237 info.id(), &values); 0238 0239 if (values.isEmpty()) 0240 { 0241 // The image id does not exist -> missing fingerprint 0242 0243 itemIDs << info.id(); 0244 } 0245 else 0246 { 0247 // The image id exists -> if uniqueHash or modificationDate differ, we need a new fingerprint. 0248 0249 if (values.size() == 2) 0250 { 0251 QDateTime modDateTime = values.at(0).toDateTime(); 0252 modDateTime.setTimeSpec(Qt::UTC); 0253 0254 if ((modDateTime != info.modDateTime()) || 0255 (values.at(1).toString() != info.uniqueHash())) 0256 { 0257 itemIDs << info.id(); 0258 } 0259 } 0260 } 0261 } 0262 } 0263 0264 return itemIDs; 0265 } 0266 0267 QStringList SimilarityDb::getDirtyOrMissingFingerprintURLs(const QList<ItemInfo>& imageInfos, 0268 FuzzyAlgorithm algorithm) 0269 { 0270 QStringList urls; 0271 0272 if (algorithm == FuzzyAlgorithm::Haar) 0273 { 0274 Q_FOREACH (const ItemInfo& info, imageInfos) 0275 { 0276 QList<QVariant> values; 0277 0278 d->db->execSql(QString::fromUtf8("SELECT modificationDate, uniqueHash FROM ImageHaarMatrix " 0279 "WHERE imageid=?;"), 0280 info.id(), &values); 0281 0282 if (values.isEmpty()) 0283 { 0284 // The image id does not exist -> missing fingerprint 0285 0286 urls << info.filePath(); 0287 } 0288 else 0289 { 0290 // The image id exists -> if uniqueHash or modificationDate differ, we need a new fingerprint. 0291 0292 if (values.size() == 2) 0293 { 0294 QDateTime modDateTime = values.at(0).toDateTime(); 0295 modDateTime.setTimeSpec(Qt::UTC); 0296 0297 if ((modDateTime != info.modDateTime()) || 0298 (values.at(1).toString() != info.uniqueHash())) 0299 { 0300 urls << info.filePath(); 0301 } 0302 } 0303 } 0304 } 0305 } 0306 0307 return urls; 0308 } 0309 0310 void SimilarityDb::copySimilarityAttributes(qlonglong srcId, qlonglong dstId) 0311 { 0312 // Go through ImageHaarMatrix table and copy the entries 0313 0314 d->db->execSql(QString::fromUtf8("REPLACE INTO ImageHaarMatrix " 0315 "(imageid, modificationDate, uniqueHash, matrix) " 0316 "SELECT ?, modificationDate, uniqueHash, matrix " 0317 " FROM ImageHaarMatrix WHERE imageid=?;"), 0318 dstId, srcId); 0319 } 0320 0321 0322 void SimilarityDb::removeImageFingerprint(qlonglong imageID, 0323 FuzzyAlgorithm algorithm) 0324 { 0325 if (algorithm == FuzzyAlgorithm::Haar) 0326 { 0327 d->db->execSql(QString::fromUtf8("DELETE FROM ImageHaarMatrix WHERE imageid=?;"), 0328 imageID); 0329 } 0330 else if (algorithm == FuzzyAlgorithm::TfIdf) 0331 { 0332 // TODO: Extend this when we have another algorithm in place. 0333 } 0334 0335 // Also, remove all similarities for the image and algorithm if the backend is a MySQL DB. 0336 0337 if (d->db->databaseType() == BdEngineBackend::DbType::MySQL) 0338 { 0339 removeImageSimilarity(imageID, algorithm); 0340 } 0341 } 0342 0343 // ----------- Methods for image similarity table access ---------- 0344 0345 double SimilarityDb::getImageSimilarity(qlonglong imageID1, qlonglong imageID2, FuzzyAlgorithm algorithm) 0346 { 0347 if ((imageID1 < 0) || (imageID2 < 0)) 0348 { 0349 return -1; 0350 } 0351 0352 // If the image ids are the same, we return 1 which is equivalent to 100%. 0353 // We do not have to access the database here as the same image id implies 0354 // the same image and thus identity. 0355 0356 if (imageID1 == imageID2) 0357 { 0358 return 1; 0359 } 0360 0361 // The image ids in each entry of the ImageSimilarity table follow a partial ordering, 0362 // meaning that the first image id is smaller or equal to the second id. 0363 // This has the advantage that only one entry exists for a pair of two images and one algorithm. 0364 // Also, no disjunction is necessary in the query. 0365 0366 QPair<qlonglong, qlonglong> orderedIds = orderIds(imageID1, imageID2); 0367 0368 QString similarityValueString = getImageSimilarityOrdered(orderedIds.first, orderedIds.second, algorithm); 0369 0370 // If the similarity is non-null 0371 0372 if (!similarityValueString.isEmpty()) 0373 { 0374 bool ok; 0375 double val = similarityValueString.toDouble(&ok); 0376 0377 if (ok) 0378 { 0379 return val; 0380 } 0381 0382 qCDebug(DIGIKAM_SIMILARITYDB_LOG()) << "The similarity value for images with ids " 0383 << imageID1 << " and " << imageID2 0384 << " for similarity algorithm with id " << (int)algorithm 0385 << " cannot be transformed into a double! The value is " 0386 << similarityValueString; 0387 } 0388 0389 // Return the info that there is no value. 0390 0391 return -1; 0392 } 0393 0394 void SimilarityDb::setImageSimilarity(qlonglong imageID1, qlonglong imageID2, double value, FuzzyAlgorithm algorithm) 0395 { 0396 // We don't do anything if the image ids are identical as this is a waste of space. 0397 0398 if (imageID1 == imageID2) 0399 { 0400 return; 0401 } 0402 0403 // The image ids in each entry of the ImageSimilarity table follow a partial ordering, 0404 // meaning that the first image id is smaller or equal to the second id. 0405 // This has the advantage that only one entry exists for a pair of two images and one algorithm. 0406 // Also, no disjunction is necessary in the query. 0407 0408 QPair<qlonglong, qlonglong> orderedIds = orderIds(imageID1, imageID2); 0409 0410 //Check if entry already exists for above pair of images.(Avoiding duplicate entries) 0411 0412 QString similarityValueString = getImageSimilarityOrdered(orderedIds.first, 0413 orderedIds.second, 0414 algorithm); 0415 0416 if (!similarityValueString.isEmpty()) 0417 { 0418 bool ok; 0419 double val = similarityValueString.toDouble(&ok); 0420 0421 if (ok && (val == value)) 0422 { 0423 return; 0424 } 0425 } 0426 0427 d->db->execSql(QString::fromUtf8("REPLACE INTO ImageSimilarity " 0428 "(imageid1, imageid2, algorithm, value) " 0429 " VALUES(?, ?, ?, ?);"), 0430 orderedIds.first, orderedIds.second, (int)algorithm, value); 0431 } 0432 0433 void SimilarityDb::removeImageSimilarity(qlonglong imageID, FuzzyAlgorithm algorithm) 0434 { 0435 d->db->execSql(QString::fromUtf8("DELETE FROM ImageSimilarity " 0436 "WHERE ( imageid1=? OR imageid2=? ) AND algorithm=?;"), 0437 imageID, imageID, (int)algorithm); 0438 } 0439 0440 void SimilarityDb::removeImageSimilarity(qlonglong imageID1, qlonglong imageID2, FuzzyAlgorithm algorithm) 0441 { 0442 QPair<qlonglong, qlonglong> orderedIds = orderIds(imageID1, imageID2); 0443 0444 d->db->execSql(QString::fromUtf8("DELETE FROM ImageSimilarity " 0445 "WHERE imageid1=? AND imageid2=? AND algorithm=?;"), 0446 orderedIds.first, orderedIds.second, (int)algorithm); 0447 } 0448 0449 void SimilarityDb::clearImageSimilarity(FuzzyAlgorithm algorithm) 0450 { 0451 d->db->execSql(QString::fromUtf8("DELETE FROM ImageSimilarity " 0452 "WHERE algorithm=?;"), 0453 (int)algorithm); 0454 } 0455 0456 QList<FuzzyAlgorithm> SimilarityDb::getImageSimilarityAlgorithms(qlonglong imageID1, qlonglong imageID2) 0457 { 0458 0459 QPair<qlonglong, qlonglong> orderedIds = orderIds(imageID1, imageID2); 0460 0461 QList<QVariant> values; 0462 0463 d->db->execSql(QString::fromUtf8("SELECT algorithm FROM ImageSimilarity " 0464 "WHERE imageid1=? AND imageid2=?;"), 0465 orderedIds.first, orderedIds.second, &values); 0466 0467 QList<FuzzyAlgorithm> algorithms; 0468 0469 Q_FOREACH (const QVariant& var, values) 0470 { 0471 int algorithmId = var.toInt(); 0472 0473 if (algorithmId == 1) 0474 { 0475 algorithms << FuzzyAlgorithm::Haar; 0476 } 0477 else if (algorithmId == 2) 0478 { 0479 algorithms << FuzzyAlgorithm::TfIdf; 0480 } 0481 } 0482 0483 return algorithms; 0484 } 0485 0486 // ----------- Database shrinking and integrity check methods ---------- 0487 0488 bool SimilarityDb::integrityCheck() 0489 { 0490 QList<QVariant> values; 0491 d->db->execDBAction(d->db->getDBAction(QString::fromUtf8("checkSimilarityDbIntegrity")), &values); 0492 0493 switch (d->db->databaseType()) 0494 { 0495 case BdEngineBackend::DbType::SQLite: 0496 { 0497 // For SQLite the integrity check returns a single row with one string column "ok" on success and multiple rows on error. 0498 0499 return ((values.size() == 1) && (values.first().toString().toLower().compare(QLatin1String("ok")) == 0)); 0500 } 0501 case BdEngineBackend::DbType::MySQL: 0502 { 0503 // For MySQL, for every checked table, the table name, operation (check), message type (status) and the message text (ok on success) 0504 // are returned. So we check if there are four elements and if yes, whether the fourth element is "ok". 0505 //qCDebug(DIGIKAM_DATABASE_LOG) << "MySQL check returned " << values.size() << " rows"; 0506 0507 if ((values.size() % 4) != 0) 0508 { 0509 return false; 0510 } 0511 0512 for (QList<QVariant>::iterator it = values.begin() ; it != values.end() ; ) 0513 { 0514 QString tableName = (*it).toString(); 0515 ++it; 0516 QString operation = (*it).toString(); 0517 ++it; 0518 QString messageType = (*it).toString(); 0519 ++it; 0520 QString messageText = (*it).toString(); 0521 ++it; 0522 0523 if (messageText.toLower().compare(QLatin1String("ok")) != 0) 0524 { 0525 qCDebug(DIGIKAM_DATABASE_LOG) << "Failed integrity check for table " 0526 << tableName << ". Reason:" << messageText; 0527 return false; 0528 } 0529 else 0530 { 0531 /* 0532 qCDebug(DIGIKAM_DATABASE_LOG) << "Passed integrity check for table " 0533 << tableName; 0534 */ 0535 } 0536 } 0537 0538 // No error conditions. Db passed the integrity check. 0539 0540 return true; 0541 } 0542 default: 0543 { 0544 return false; 0545 } 0546 } 0547 } 0548 0549 void SimilarityDb::vacuum() 0550 { 0551 d->db->execDBAction(d->db->getDBAction(QString::fromUtf8("vacuumSimilarityDB"))); 0552 } 0553 0554 // ----------- Private methods ---------- 0555 0556 QPair<qlonglong, qlonglong> SimilarityDb::orderIds(qlonglong id1, qlonglong id2) 0557 { 0558 QPair<qlonglong, qlonglong> ordered; 0559 0560 if (id1 <= id2) 0561 { 0562 // If the first id is smaller or equal to the second, set it as first. 0563 0564 ordered.first = id1; 0565 ordered.second = id2; 0566 } 0567 else 0568 { 0569 ordered.first = id2; 0570 ordered.second = id1; 0571 } 0572 0573 return ordered; 0574 } 0575 0576 QString SimilarityDb::getImageSimilarityOrdered(qlonglong imageID1, qlonglong imageID2, FuzzyAlgorithm algorithm) 0577 { 0578 QList<QVariant> values; 0579 0580 d->db->execSql(QString::fromUtf8("SELECT value FROM ImageSimilarity " 0581 "WHERE ( imageid1=? OR imageid2=? ) AND algorithm=?;"), 0582 imageID1, imageID2, (int)algorithm, &values); 0583 0584 if (values.isEmpty()) 0585 { 0586 return QString(); 0587 } 0588 0589 return values.first().toString(); 0590 } 0591 0592 } // namespace Digikam