File indexing completed on 2025-01-05 03:54:03
0001 /* ============================================================ 0002 * 0003 * This file is a part of digiKam project 0004 * https://www.digikam.org 0005 * 0006 * Date : 2003-01-17 0007 * Description : Haar Database interface 0008 * 0009 * SPDX-FileCopyrightText: 2016-2018 by Mario Frank <mario dot frank at uni minus potsdam dot de> 0010 * SPDX-FileCopyrightText: 2003 by Ricardo Niederberger Cabral <nieder at mail dot ru> 0011 * SPDX-FileCopyrightText: 2009-2024 by Gilles Caulier <caulier dot gilles at gmail dot com> 0012 * SPDX-FileCopyrightText: 2009-2013 by Marcel Wiesweg <marcel dot wiesweg at gmx dot de> 0013 * SPDX-FileCopyrightText: 2009-2011 by Andi Clemens <andi dot clemens at gmail dot com> 0014 * 0015 * SPDX-License-Identifier: GPL-2.0-or-later 0016 * 0017 * ============================================================ */ 0018 0019 #include "haariface_p.h" 0020 0021 #define ENABLE_DEBUG_DUPLICATES 0 0022 #if ENABLE_DEBUG_DUPLICATES 0023 # define DEBUG_DUPLICATES(x) qCDebug(DIGIKAM_DATABASE_LOG) << x; 0024 #else 0025 # define DEBUG_DUPLICATES(x) 0026 #endif 0027 0028 namespace Digikam 0029 { 0030 0031 HaarIface::HaarIface() 0032 : d(new Private()) 0033 { 0034 qRegisterMetaType<DuplicatesResultsMap>("HaarIface::DuplicatesResultsMap"); 0035 } 0036 0037 HaarIface::HaarIface(const QSet<qlonglong>& images2Scan) 0038 : HaarIface() 0039 { 0040 d->rebuildSignatureCache(images2Scan); 0041 } 0042 0043 HaarIface::~HaarIface() 0044 { 0045 delete d; 0046 } 0047 0048 void HaarIface::setAlbumRootsToSearch(const QList<int>& albumRootIds) 0049 { 0050 setAlbumRootsToSearch(QSet<int>(albumRootIds.begin(), albumRootIds.end())); 0051 } 0052 0053 void HaarIface::setAlbumRootsToSearch(const QSet<int>& albumRootIds) 0054 { 0055 d->setAlbumRootsToSearch(albumRootIds); 0056 } 0057 0058 int HaarIface::preferredSize() 0059 { 0060 return Haar::NumberOfPixels; 0061 } 0062 0063 bool HaarIface::indexImage(const QString& filename) 0064 { 0065 QImage image = loadQImage(filename); 0066 0067 if (image.isNull()) 0068 { 0069 return false; 0070 } 0071 0072 return indexImage(filename, image); 0073 } 0074 0075 bool HaarIface::indexImage(const QString& filename, const QImage& image) 0076 { 0077 ItemInfo info = ItemInfo::fromLocalFile(filename); 0078 0079 if (info.isNull()) 0080 { 0081 return false; 0082 } 0083 0084 return indexImage(info.id(), image); 0085 } 0086 0087 bool HaarIface::indexImage(const QString& filename, const DImg& image) 0088 { 0089 ItemInfo info = ItemInfo::fromLocalFile(filename); 0090 0091 if (info.isNull()) 0092 { 0093 return false; 0094 } 0095 0096 return indexImage(info.id(), image); 0097 } 0098 0099 bool HaarIface::indexImage(qlonglong imageid, const QImage& image) 0100 { 0101 if (image.isNull()) 0102 { 0103 return false; 0104 } 0105 0106 d->setImageDataFromImage(image); 0107 0108 return indexImage(imageid); 0109 } 0110 0111 bool HaarIface::indexImage(qlonglong imageid, const DImg& image) 0112 { 0113 if (image.isNull()) 0114 { 0115 return false; 0116 } 0117 0118 d->setImageDataFromImage(image); 0119 0120 return indexImage(imageid); 0121 } 0122 0123 // NOTE: private method: d->m_data has been filled 0124 0125 bool HaarIface::indexImage(qlonglong imageid) 0126 { 0127 Haar::Calculator haar; 0128 haar.transform(d->imageData()); 0129 0130 Haar::SignatureData sig; 0131 haar.calcHaar(d->imageData(), &sig); 0132 0133 // Store main entry 0134 0135 DatabaseBlob blob; 0136 QByteArray array = blob.write(sig); 0137 0138 ItemInfo info(imageid); 0139 0140 if (!info.isNull() && info.isVisible()) 0141 { 0142 SimilarityDbAccess().backend()->execSql(QString::fromUtf8("REPLACE INTO ImageHaarMatrix " 0143 " (imageid, modificationDate, uniqueHash, matrix) " 0144 " VALUES(?, ?, ?, ?);"), 0145 imageid, info.modDateTime(), info.uniqueHash(), array); 0146 } 0147 0148 return true; 0149 } 0150 0151 QString HaarIface::signatureAsText(const QImage& image) 0152 { 0153 d->setImageDataFromImage(image); 0154 0155 Haar::Calculator haar; 0156 haar.transform(d->imageData()); 0157 Haar::SignatureData sig; 0158 haar.calcHaar(d->imageData(), &sig); 0159 0160 DatabaseBlob blob; 0161 QByteArray array = blob.write(sig); 0162 0163 return QString::fromUtf8(array.toBase64()); 0164 } 0165 0166 QPair<double, QMap<qlonglong, double> > HaarIface::bestMatchesForImageWithThreshold(const QString& imagePath, 0167 double requiredPercentage, 0168 double maximumPercentage, 0169 const QList<int>& targetAlbums, 0170 DuplicatesSearchRestrictions 0171 searchResultRestriction, 0172 SketchType type) 0173 { 0174 DImg image(imagePath); 0175 0176 if (image.isNull()) 0177 { 0178 return QPair<double, QMap<qlonglong, double> >(); 0179 } 0180 0181 d->setImageDataFromImage(image); 0182 0183 Haar::Calculator haar; 0184 haar.transform(d->imageData()); 0185 Haar::SignatureData sig; 0186 haar.calcHaar(d->imageData(), &sig); 0187 0188 // Remove all previous similarities from pictures 0189 0190 SimilarityDbAccess().db()->removeImageSimilarity(0); 0191 0192 // Apply duplicates search for the image. Use the image id 0 which cannot be present. 0193 0194 return bestMatchesWithThreshold(0, 0195 &sig, 0196 requiredPercentage, 0197 maximumPercentage, 0198 targetAlbums, 0199 searchResultRestriction, 0200 type); 0201 } 0202 0203 QPair<double, QMap<qlonglong, double> > HaarIface::bestMatchesForImageWithThreshold(qlonglong imageId, 0204 double requiredPercentage, 0205 double maximumPercentage, 0206 const QList<int>& targetAlbums, 0207 DuplicatesSearchRestrictions 0208 searchResultRestriction, 0209 SketchType type) 0210 { 0211 Haar::SignatureData sig; 0212 0213 if (d->hasSignatureCache()) 0214 { 0215 if (!d->retrieveSignatureFromCache(imageId, sig)) 0216 { 0217 return {}; 0218 } 0219 } 0220 else 0221 { 0222 if (!retrieveSignatureFromDB(imageId, sig)) 0223 { 0224 return {}; 0225 } 0226 } 0227 0228 return bestMatchesWithThreshold(imageId, 0229 &sig, 0230 requiredPercentage, 0231 maximumPercentage, 0232 targetAlbums, 0233 searchResultRestriction, 0234 type); 0235 } 0236 0237 QMap<qlonglong, double> HaarIface::bestMatchesForSignature(const QString& signature, 0238 const QList<int>& targetAlbums, 0239 int numberOfResults, 0240 SketchType type) 0241 { 0242 QByteArray bytes = QByteArray::fromBase64(signature.toLatin1()); 0243 0244 DatabaseBlob blobReader; 0245 Haar::SignatureData sig; 0246 blobReader.read(bytes, sig); 0247 0248 // Get all matching images with their score and save their similarity to the signature, i.e. id -2 0249 0250 QMultiMap<double, qlonglong> matches = bestMatches(&sig, numberOfResults, targetAlbums, type); 0251 QMap<qlonglong, double> result; 0252 0253 for (QMultiMap<double, qlonglong>::const_iterator it = matches.constBegin() ; 0254 it != matches.constEnd() ; ++it) 0255 { 0256 // Add the image id and the normalised score (make sure that it is positive and between 0 and 1. 0257 0258 result.insert(it.value(), (0.0 - (it.key() / 100))); 0259 } 0260 0261 return result; 0262 } 0263 0264 QMultiMap<double, qlonglong> HaarIface::bestMatches(Haar::SignatureData* const querySig, 0265 int numberOfResults, 0266 const QList<int>& targetAlbums, 0267 SketchType type) 0268 { 0269 QMap<qlonglong, double> scores = searchDatabase(querySig, type, targetAlbums); 0270 0271 // Find out the best matches, those with the lowest score 0272 // We make use of the feature that QMap keys are sorted in ascending order 0273 // Of course, images can have the same score, so we need a multi map 0274 0275 QMultiMap<double, qlonglong> bestMatches; 0276 bool initialFill = false; 0277 double score, worstScore, bestScore; 0278 qlonglong id; 0279 0280 for (QMap<qlonglong, double>::const_iterator it = scores.constBegin() ; 0281 it != scores.constEnd() ; ++it) 0282 { 0283 score = it.value(); 0284 id = it.key(); 0285 0286 if (!initialFill) 0287 { 0288 // as long as the maximum number of results is not reached, just fill up the map 0289 0290 bestMatches.insert(score, id); 0291 initialFill = (bestMatches.size() >= numberOfResults); 0292 } 0293 else 0294 { 0295 // find the last entry, the one with the highest (=worst) score 0296 0297 QMultiMap<double, qlonglong>::iterator last = bestMatches.end(); 0298 --last; 0299 worstScore = last.key(); 0300 0301 // if the new entry has a higher score, put it in the list and remove that last one 0302 0303 if (score < worstScore) 0304 { 0305 bestMatches.erase(last); 0306 bestMatches.insert(score, id); 0307 } 0308 else if (score == worstScore) 0309 { 0310 bestScore = bestMatches.begin().key(); 0311 0312 // if the score is identical for all entries, increase the maximum result number 0313 0314 if (score == bestScore) 0315 { 0316 bestMatches.insert(score, id); 0317 } 0318 } 0319 } 0320 } 0321 0322 /* 0323 for (QMap<double, qlonglong>::iterator it = bestMatches.begin(); it != bestMatches.end(); ++it) 0324 { 0325 qCDebug(DIGIKAM_DATABASE_LOG) << it.key() << it.value(); 0326 } 0327 */ 0328 0329 return bestMatches; 0330 } 0331 0332 QPair<double, QMap<qlonglong, double> > HaarIface::bestMatchesWithThreshold(qlonglong imageid, 0333 Haar::SignatureData* const querySig, 0334 double requiredPercentage, 0335 double maximumPercentage, 0336 const QList<int>& targetAlbums, 0337 DuplicatesSearchRestrictions 0338 searchResultRestriction, 0339 SketchType type) 0340 { 0341 int albumId = CoreDbAccess().db()->getItemAlbum(imageid); 0342 QMap<qlonglong, double> scores = searchDatabase(querySig, 0343 type, 0344 targetAlbums, 0345 searchResultRestriction, 0346 imageid, 0347 albumId); 0348 double lowest, highest; 0349 getBestAndWorstPossibleScore(querySig, type, &lowest, &highest); 0350 0351 // The range between the highest (worst) and lowest (best) score 0352 // example: 0.2 and 0.5 -> 0.3 0353 0354 double scoreRange = highest - lowest; 0355 0356 // The lower the requiredPercentage is, the higher will the result be. 0357 // example: 0.7 -> 0.3 0358 0359 double percentageRange = 1.0 - requiredPercentage; 0360 0361 // example: 0.2 + (0.3 * 0.3) = 0.2 + 0.09 = 0.29 0362 0363 double requiredScore = lowest + scoreRange * percentageRange; 0364 0365 // Set the supremum which solves the problem that if 0366 // required == maximum, no results will be returned. 0367 // Eg, id required == maximum == 50.0, only images with exactly this 0368 // similarity are returned. But users expect also to see images 0369 // with similarity 50,x. 0370 0371 double supremum = (floor(maximumPercentage * 100 + 1.0)) / 100; 0372 0373 QMap<qlonglong, double> bestMatches; 0374 double score, percentage, avgPercentage = 0.0; 0375 QPair<double, QMap<qlonglong, double> > result; 0376 qlonglong id; 0377 0378 for (QMap<qlonglong, double>::const_iterator it = scores.constBegin() ; 0379 it != scores.constEnd() ; ++it) 0380 { 0381 score = it.value(); 0382 id = it.key(); 0383 0384 // If the score of the picture is at most the required (maximum) score and 0385 0386 if (score <= requiredScore) 0387 { 0388 percentage = 1.0 - (score - lowest) / scoreRange; 0389 0390 // If the found image is the original one (check by id) or the percentage is below the maximum. 0391 0392 if ((id == imageid) || (percentage < supremum)) 0393 { 0394 bestMatches.insert(id, percentage); 0395 0396 // If the current image is not the original, use the images similarity for the average percentage 0397 // Also, save the similarity of the found image to the original image. 0398 0399 if (id != imageid) 0400 { 0401 // Store the similarity if the reference image has a valid image id 0402 0403 if (imageid > 0) 0404 { 0405 SimilarityDbAccess().db()->setImageSimilarity(id, imageid, percentage); 0406 } 0407 0408 avgPercentage += percentage; 0409 } 0410 } 0411 } 0412 } 0413 0414 // Debug output 0415 0416 if (bestMatches.count() > 1) 0417 { 0418 // The average percentage is the sum of all percentages 0419 // (without the original picture) divided by the count of pictures -1. 0420 // Subtracting 1 is necessary since the original picture is not used for the calculation. 0421 0422 avgPercentage = avgPercentage / (bestMatches.count() - 1); 0423 0424 qCDebug(DIGIKAM_DATABASE_LOG) << "Duplicates with id and score:"; 0425 0426 for (QMap<qlonglong, double>::const_iterator it = bestMatches.constBegin() ; it != bestMatches.constEnd() ; ++it) 0427 { 0428 qCDebug(DIGIKAM_DATABASE_LOG) << it.key() << QString::number(it.value() * 100) + QLatin1Char('%'); 0429 } 0430 } 0431 0432 result.first = avgPercentage; 0433 result.second = bestMatches; 0434 0435 return result; 0436 } 0437 0438 bool HaarIface::fulfillsRestrictions(qlonglong imageId, int albumId, 0439 qlonglong originalImageId, 0440 int originalAlbumId, 0441 const QList<int>& targetAlbums, 0442 DuplicatesSearchRestrictions searchResultRestriction) 0443 { 0444 if (imageId == originalImageId) 0445 { 0446 return true; 0447 } 0448 else if (targetAlbums.isEmpty() || targetAlbums.contains(albumId)) 0449 { 0450 return ( searchResultRestriction == None) || 0451 ((searchResultRestriction == SameAlbum) && (originalAlbumId == albumId)) || 0452 ((searchResultRestriction == DifferentAlbum) && (originalAlbumId != albumId)); 0453 } 0454 else 0455 { 0456 return false; 0457 } 0458 } 0459 0460 QMap<qlonglong, double> HaarIface::searchDatabase(Haar::SignatureData* const querySig, 0461 SketchType type, const QList<int>& targetAlbums, 0462 DuplicatesSearchRestrictions searchResultRestriction, 0463 qlonglong originalImageId, 0464 int originalAlbumId) 0465 { 0466 // The table of constant weight factors applied to each channel and the weight bin 0467 0468 Haar::Weights weights((Haar::Weights::SketchType)type); 0469 0470 // layout the query signature for fast lookup 0471 0472 Haar::SignatureMap queryMapY, queryMapI, queryMapQ; 0473 queryMapY.fill(querySig->sig[0]); 0474 queryMapI.fill(querySig->sig[1]); 0475 queryMapQ.fill(querySig->sig[2]); 0476 std::reference_wrapper<Haar::SignatureMap> queryMaps[3] = { queryMapY, queryMapI, queryMapQ }; 0477 0478 // Map imageid -> score. Lowest score is best. 0479 // any newly inserted value will be initialized with a score of 0, as required 0480 0481 QMap<qlonglong, double> scores; 0482 0483 // if no cache is used or the cache signature map is empty, query the database 0484 0485 if (!d->hasSignatureCache()) 0486 { 0487 d->rebuildSignatureCache(); 0488 } 0489 0490 for (auto it = d->signatureCache()->constBegin() ; it != d->signatureCache()->constEnd() ; ++it) 0491 { 0492 // If the image is the original one or 0493 // No restrictions apply or 0494 // SameAlbum restriction applies and the albums are equal or 0495 // DifferentAlbum restriction applies and the albums differ 0496 // then calculate the score. 0497 0498 const qlonglong& imageId = it.key(); 0499 0500 if (fulfillsRestrictions(imageId, d->albumCache()->value(imageId), originalImageId, 0501 originalAlbumId, targetAlbums, searchResultRestriction)) 0502 { 0503 const Haar::SignatureData& data = it.value(); 0504 scores[imageId] = calculateScore(*querySig, data, weights, queryMaps); 0505 } 0506 } 0507 0508 return scores; 0509 } 0510 0511 QImage HaarIface::loadQImage(const QString& filename) 0512 { 0513 // NOTE: Can be optimized using DImg. 0514 0515 QImage image; 0516 0517 if (JPEGUtils::isJpegImage(filename)) 0518 { 0519 // use fast jpeg loading 0520 0521 if (!JPEGUtils::loadJPEGScaled(image, filename, Haar::NumberOfPixels)) 0522 { 0523 // try QT now. 0524 0525 if (!image.load(filename)) 0526 { 0527 return QImage(); 0528 } 0529 } 0530 } 0531 else 0532 { 0533 // use default QT image loading 0534 0535 if (!image.load(filename)) 0536 { 0537 return QImage(); 0538 } 0539 } 0540 0541 return image; 0542 } 0543 0544 bool HaarIface::retrieveSignatureFromDB(qlonglong imageid, Haar::SignatureData& sig) 0545 { 0546 QList<QVariant> values; 0547 SimilarityDbAccess().backend()->execSql(QString::fromUtf8("SELECT matrix FROM ImageHaarMatrix " 0548 " WHERE imageid=?;"), 0549 imageid, &values); 0550 0551 if (values.isEmpty()) 0552 { 0553 return false; 0554 } 0555 0556 DatabaseBlob blob; 0557 0558 blob.read(values.first().toByteArray(), sig); 0559 return true; 0560 } 0561 0562 void HaarIface::getBestAndWorstPossibleScore(Haar::SignatureData* const sig, 0563 SketchType type, 0564 double* const lowestAndBestScore, 0565 double* const highestAndWorstScore) 0566 { 0567 Haar::Weights weights(static_cast<Haar::Weights::SketchType>(type)); 0568 double score = 0; 0569 0570 // In the first step, the score is initialized with the weighted color channel averages. 0571 // We don't know the target channel average here, we only now its not negative => assume 0 0572 0573 for (int channel = 0 ; channel < 3 ; ++channel) 0574 { 0575 score += weights.weightForAverage(channel) * fabs(sig->avg[channel] /*- targetSig.avg[channel]*/); 0576 } 0577 0578 *highestAndWorstScore = score; 0579 0580 // Next consideration: The lowest possible score is reached if the signature is identical. 0581 // The first step (see above) will result in 0 - skip it. 0582 // In the second step, for every coefficient in the sig that have query and target in common, 0583 // so in our case all 3*40, subtract the specifically assigned weighting. 0584 0585 score = 0; 0586 0587 for (int channel = 0 ; channel < 3 ; ++channel) 0588 { 0589 Haar::Idx* const coefs = sig->sig[channel]; 0590 0591 for (int coef = 0 ; coef < Haar::NumberOfCoefficients ; ++coef) 0592 { 0593 score -= weights.weight(d->weightBin.binAbs(coefs[coef]), channel); 0594 } 0595 } 0596 0597 *lowestAndBestScore = score; 0598 } 0599 0600 0601 QMap<QString, QString> HaarIface::writeSAlbumQueries(const DuplicatesResultsMap& searchResults) 0602 { 0603 // Build search XML from the results. Store list of ids of similar images. 0604 0605 QMap<QString, QString> queries; 0606 0607 for (auto it = searchResults.constBegin() ; it != searchResults.constEnd() ; ++it) 0608 { 0609 SearchXmlWriter writer; 0610 writer.writeGroup(); 0611 writer.writeField(QLatin1String("imageid"), SearchXml::OneOf); 0612 writer.writeValue(it->second); 0613 writer.finishField(); 0614 0615 // Add the average similarity as field 0616 0617 writer.writeField(QLatin1String("noeffect_avgsim"), SearchXml::Equal); 0618 writer.writeValue(it->first * 100); 0619 writer.finishField(); 0620 writer.finishGroup(); 0621 writer.finish(); 0622 0623 // Use the id of the first duplicate as name of the search 0624 0625 queries.insert(QString::number(it.key()), writer.xml()); 0626 } 0627 0628 return queries; 0629 } 0630 0631 void HaarIface::rebuildDuplicatesAlbums(const DuplicatesResultsMap& results, bool isAlbumUpdate) 0632 { 0633 // Build search XML from the results. Store list of ids of similar images. 0634 0635 QMap<QString, QString> queries = writeSAlbumQueries(results); 0636 0637 // Write the new search albums to the database. 0638 0639 CoreDbAccess access; 0640 CoreDbTransaction transaction(&access); 0641 0642 // Full rebuild: delete all old searches. 0643 0644 if (!isAlbumUpdate) 0645 { 0646 access.db()->deleteSearches(DatabaseSearch::DuplicatesSearch); 0647 } 0648 0649 // Create new groups, or update existing searches. 0650 0651 for (QMap<QString, QString>::const_iterator it = queries.constBegin() ; 0652 it != queries.constEnd() ; ++it) 0653 { 0654 if (isAlbumUpdate) 0655 { 0656 access.db()->deleteSearch(it.key().toInt()); 0657 } 0658 0659 access.db()->addSearch(DatabaseSearch::DuplicatesSearch, it.key(), it.value()); 0660 } 0661 } 0662 0663 QSet<qlonglong> HaarIface::imagesFromAlbumsAndTags(const QList<int>& albums2Scan, 0664 const QList<int>& tags2Scan, 0665 AlbumTagRelation relation) 0666 { 0667 QSet<qlonglong> imagesFromAlbums; 0668 QSet<qlonglong> imagesFromTags; 0669 QSet<qlonglong> images; 0670 0671 // Get all items DB id from all albums and all collections 0672 0673 Q_FOREACH (int albumId, albums2Scan) 0674 { 0675 const auto list = CoreDbAccess().db()->getItemIDsInAlbum(albumId); 0676 imagesFromAlbums.unite(QSet<qlonglong>(list.begin(), list.end())); 0677 } 0678 0679 // Get all items DB id from all tags 0680 0681 Q_FOREACH (int albumId, tags2Scan) 0682 { 0683 const auto list = CoreDbAccess().db()->getItemIDsInTag(albumId); 0684 imagesFromTags.unite(QSet<qlonglong>(list.begin(), list.end())); 0685 } 0686 0687 switch (relation) 0688 { 0689 case Union: 0690 { 0691 // ({} UNION A) UNION T = A UNION T 0692 0693 images.unite(imagesFromAlbums).unite(imagesFromTags); 0694 break; 0695 } 0696 0697 case Intersection: 0698 { 0699 // ({} UNION A) INTERSECT T = A INTERSECT T 0700 0701 images.unite(imagesFromAlbums).intersect(imagesFromTags); 0702 break; 0703 } 0704 0705 case AlbumExclusive: 0706 { 0707 // ({} UNION A) = A 0708 0709 images.unite(imagesFromAlbums); 0710 0711 // (A INTERSECT T) = A' 0712 0713 imagesFromAlbums.intersect(imagesFromTags); 0714 0715 // A\A' = albums without tags 0716 0717 images.subtract(imagesFromAlbums); 0718 break; 0719 } 0720 0721 case TagExclusive: 0722 { 0723 // ({} UNION T) = TT 0724 0725 images.unite(imagesFromTags); 0726 0727 // (A INTERSECT T) = A' = T' 0728 0729 imagesFromAlbums.intersect(imagesFromTags); 0730 0731 // T\T' = tags without albums 0732 0733 images.subtract(imagesFromAlbums); 0734 break; 0735 } 0736 0737 case NoMix: 0738 { 0739 if ((albums2Scan.isEmpty() && tags2Scan.isEmpty())) 0740 { 0741 qCWarning(DIGIKAM_GENERAL_LOG) << "Duplicates search: Both the albums and the tags " 0742 "list are non-empty but the album/tag relation " 0743 "stated a NoMix. Skipping duplicates search"; 0744 return {}; 0745 } 0746 else 0747 { 0748 // ({} UNION A) UNION T = A UNION T = A Xor T 0749 0750 images.unite(imagesFromAlbums).unite(imagesFromTags); 0751 } 0752 } 0753 } 0754 0755 return images; 0756 } 0757 0758 HaarIface::DuplicatesResultsMap HaarIface::findDuplicates(const QSet<qlonglong>& images2Scan, 0759 const QSet<qlonglong>::const_iterator& rangeBegin, 0760 const QSet<qlonglong>::const_iterator& rangeEnd, 0761 RefImageSelMethod refImageSelectionMethod, 0762 const QSet<qlonglong>& refs, 0763 double requiredPercentage, 0764 double maximumPercentage, 0765 DuplicatesSearchRestrictions searchResultRestriction, 0766 HaarProgressObserver* const observer) 0767 { 0768 static const QList<int> emptyTargetAlbums; 0769 DuplicatesResultsMap resultsMap; 0770 DuplicatesResultsMap::iterator resultsIterator; 0771 QSet<qlonglong>::const_iterator images2ScanIterator; 0772 QPair<double, QMap<qlonglong, double> > bestMatches; 0773 QList<qlonglong> duplicates; 0774 QSet<qlonglong> resultsCandidates; 0775 const bool singleThread = ((rangeBegin == images2Scan.constBegin()) && 0776 (rangeEnd == images2Scan.constEnd())); 0777 0778 // create signature cache map for fast lookup 0779 0780 if (!d->hasSignatureCache()) 0781 { 0782 d->rebuildSignatureCache(images2Scan); 0783 } 0784 0785 for (images2ScanIterator = rangeBegin ; images2ScanIterator != rangeEnd ; ++images2ScanIterator) 0786 { 0787 0788 #if ENABLE_DEBUG_DUPLICATES 0789 0790 { 0791 ItemInfo info(*images2ScanIterator); 0792 const QString path = info.filePath(); 0793 const QString name = info.name(); 0794 DEBUG_DUPLICATES("Iterate image: " << name << "Path: " << path); 0795 } 0796 0797 #endif 0798 0799 if (observer && observer->isCanceled()) 0800 { 0801 break; 0802 } 0803 0804 if (!resultsCandidates.contains(*images2ScanIterator)) 0805 { 0806 // find images with required similarity 0807 0808 bestMatches = bestMatchesForImageWithThreshold(*images2ScanIterator, 0809 requiredPercentage, 0810 maximumPercentage, 0811 emptyTargetAlbums, 0812 searchResultRestriction, 0813 ScannedSketch); 0814 0815 // We need only the image ids from the best matches map. 0816 0817 duplicates = bestMatches.second.keys(); 0818 0819 // the list will usually contain one image: the original. Filter out. 0820 0821 if (!(duplicates.isEmpty()) && !((duplicates.count() == 1) && (duplicates.first() == *images2ScanIterator))) 0822 { 0823 DEBUG_DUPLICATES("\tHas duplicates"); 0824 0825 // Use the oldest image date or larger pixel/file size as the reference image. 0826 // Or if the image is in the refImage list 0827 0828 QDateTime refDateTime; 0829 QDateTime refModDateTime; 0830 quint64 refPixelSize = 0; 0831 qlonglong refFileSize = 0; 0832 qlonglong reference = *images2ScanIterator; 0833 0834 const bool useReferenceImages = ((refImageSelectionMethod == RefImageSelMethod::PreferFolder) || 0835 (refImageSelectionMethod == RefImageSelMethod::ExcludeFolder)); 0836 0837 bool referenceFound = false; 0838 0839 if (useReferenceImages) 0840 { 0841 for (auto it = refs.begin() ; it != refs.end() ; ++it) 0842 { 0843 0844 #if ENABLE_DEBUG_DUPLICATES 0845 0846 { 0847 ItemInfo info(*it); 0848 const QString path = info.filePath(); 0849 const QString name = info.name(); 0850 DEBUG_DUPLICATES("\tReference image: " << name << "Path: " << path << ", Id: " << info.id()); 0851 } 0852 0853 #endif 0854 0855 if (*it == *images2ScanIterator) 0856 { 0857 // image of images2ScanIterator is already in the references present, so take it as the 0858 // reference 0859 0860 DEBUG_DUPLICATES("\tReference found!"); 0861 referenceFound = true; 0862 break; 0863 } 0864 } 0865 } 0866 0867 0868 if (!useReferenceImages || 0869 (!referenceFound && (refImageSelectionMethod == RefImageSelMethod::PreferFolder)) || 0870 (referenceFound && (refImageSelectionMethod == RefImageSelMethod::ExcludeFolder))) 0871 { 0872 DEBUG_DUPLICATES("\tChecking Duplicates") 0873 0874 Q_FOREACH (const qlonglong& refId, duplicates) 0875 { 0876 0877 #if ENABLE_DEBUG_DUPLICATES 0878 0879 { 0880 ItemInfo info(refId); 0881 const QString path = info.filePath(); 0882 const QString name = info.name(); 0883 DEBUG_DUPLICATES("\t\tDuplicates: " << name << "Path: " << path << ", Id: " << info.id()); 0884 } 0885 0886 #endif 0887 0888 ItemInfo info(refId); 0889 quint64 infoPixelSize = (quint64)info.dimensions().width() * 0890 (quint64)info.dimensions().height(); 0891 0892 bool referenceFound = false; 0893 0894 if (useReferenceImages) 0895 { 0896 for (auto it = refs.begin() ; it != refs.end() ; ++it) 0897 { 0898 0899 #if ENABLE_DEBUG_DUPLICATES 0900 0901 { 0902 ItemInfo info(*it); 0903 const QString path = info.filePath(); 0904 const QString name = info.name(); 0905 DEBUG_DUPLICATES("\t\tReference image: " << name << "Path: " << path << ", Id: " << info.id()); 0906 } 0907 0908 #endif 0909 0910 if (*it == refId) 0911 { 0912 DEBUG_DUPLICATES("\t\tReference found!"); 0913 referenceFound = true; 0914 break; 0915 } 0916 } 0917 } 0918 0919 const bool preferFolderCond = (referenceFound && (refImageSelectionMethod == RefImageSelMethod::PreferFolder)); 0920 0921 const bool excludeFolderCond = (!referenceFound && (refImageSelectionMethod == RefImageSelMethod::ExcludeFolder)); 0922 0923 const bool newerCreationCond = ((refImageSelectionMethod == RefImageSelMethod::NewerCreationDate) && 0924 (!refDateTime.isValid() || (info.dateTime() > refDateTime))); 0925 0926 const bool newerModCond = ((refImageSelectionMethod == RefImageSelMethod::NewerModificationDate) && 0927 (!refModDateTime.isValid() || (info.modDateTime() > refModDateTime))); 0928 0929 const bool olderOrLargerCond = ((refImageSelectionMethod == RefImageSelMethod::OlderOrLarger) && 0930 (!refDateTime.isValid() || 0931 (infoPixelSize > refPixelSize) || 0932 ((infoPixelSize == refPixelSize) && (info.fileSize() > refFileSize)) || 0933 ((infoPixelSize == refPixelSize) && (info.fileSize() == refFileSize) && 0934 (info.dateTime() < refDateTime)))); 0935 0936 if (preferFolderCond || excludeFolderCond || newerCreationCond || newerModCond || olderOrLargerCond) 0937 { 0938 reference = refId; 0939 refDateTime = info.dateTime(); 0940 refModDateTime = info.modDateTime(); 0941 refFileSize = info.fileSize(); 0942 refPixelSize = infoPixelSize; 0943 0944 #if ENABLE_DEBUG_DUPLICATES 0945 0946 { 0947 const QString path = info.filePath(); 0948 const QString name = info.name(); 0949 DEBUG_DUPLICATES("\t\tUse as eference image: " << name << "Path: " << path << ", Id: " << info.id() << "Pixelsize: " << infoPixelSize << ", File size: " << refFileSize << ", Datetime: " << refDateTime); 0950 } 0951 0952 #endif 0953 0954 if (preferFolderCond || excludeFolderCond) 0955 { 0956 break; 0957 } 0958 } 0959 } 0960 } 0961 0962 resultsMap.insert(reference, qMakePair(bestMatches.first, duplicates)); 0963 0964 resultsCandidates << *images2ScanIterator; 0965 resultsCandidates.unite(QSet<qlonglong>(duplicates.begin(), duplicates.end())); 0966 } 0967 } 0968 0969 // if an imageid is not a results candidate, remove it 0970 // from the cached signature map as well, 0971 // to greatly improve speed 0972 0973 if (singleThread && !resultsCandidates.contains(*images2ScanIterator)) 0974 { 0975 d->signatureCache()->remove(*images2ScanIterator); 0976 } 0977 0978 if (observer) 0979 { 0980 observer->imageProcessed(); 0981 } 0982 } 0983 #if ENABLE_DEBUG_DUPLICATES 0984 0985 DEBUG_DUPLICATES("Results:"); 0986 0987 for (auto i = resultsMap.constBegin() ; i != resultsMap.constEnd() ; ++i) 0988 { 0989 ItemInfo info(i.key()); 0990 const QString path = info.filePath(); 0991 const QString name = info.name(); 0992 DEBUG_DUPLICATES("\t\tReference image: " << name << "Path: " << path << ", Id: " << info.id()); 0993 } 0994 0995 #endif 0996 0997 return resultsMap; 0998 } 0999 1000 double HaarIface::calculateScore(const Haar::SignatureData& querySig, 1001 const Haar::SignatureData& targetSig, 1002 const Haar::Weights& weights, 1003 std::reference_wrapper<Haar::SignatureMap>* const queryMaps) 1004 { 1005 double score = 0.0; 1006 1007 // Step 1: Initialize scores with average intensity values of all three channels 1008 1009 for (int channel = 0 ; channel < 3 ; ++channel) 1010 { 1011 score += weights.weightForAverage(channel) * fabs(querySig.avg[channel] - targetSig.avg[channel]); 1012 } 1013 1014 // Step 2: Decrease the score if query and target have significant coefficients in common 1015 1016 int x = 0; 1017 1018 for (int channel = 0 ; channel < 3 ; ++channel) 1019 { 1020 const Haar::SignatureMap& queryMap = queryMaps[channel]; 1021 1022 for (int coef = 0 ; coef < Haar::NumberOfCoefficients ; ++coef) 1023 { 1024 // x is a pixel index, either positive or negative, 0..16384 1025 1026 x = targetSig.sig[channel][coef]; 1027 1028 // If x is a significant coefficient with the same sign in the query signature as well, 1029 // decrease the score (lower is better) 1030 // Note: both method calls called with x accept positive or negative values 1031 1032 if ((queryMap)[x]) 1033 { 1034 score -= weights.weight(d->weightBin.binAbs(x), channel); 1035 } 1036 } 1037 } 1038 1039 return score; 1040 } 1041 1042 } // namespace Digikam