File indexing completed on 2025-01-05 03:54:03

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam project
0004  * https://www.digikam.org
0005  *
0006  * Date        : 2003-01-17
0007  * Description : Haar Database interface
0008  *
0009  * SPDX-FileCopyrightText: 2016-2018 by Mario Frank <mario dot frank at uni minus potsdam dot de>
0010  * SPDX-FileCopyrightText: 2003      by Ricardo Niederberger Cabral <nieder at mail dot ru>
0011  * SPDX-FileCopyrightText: 2009-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0012  * SPDX-FileCopyrightText: 2009-2013 by Marcel Wiesweg <marcel dot wiesweg at gmx dot de>
0013  * SPDX-FileCopyrightText: 2009-2011 by Andi Clemens <andi dot clemens at gmail dot com>
0014  *
0015  * SPDX-License-Identifier: GPL-2.0-or-later
0016  *
0017  * ============================================================ */
0018 
0019 #include "haariface_p.h"
0020 
0021 #define ENABLE_DEBUG_DUPLICATES 0
0022 #if ENABLE_DEBUG_DUPLICATES
0023 #   define DEBUG_DUPLICATES(x) qCDebug(DIGIKAM_DATABASE_LOG) << x;
0024 #else
0025 #   define DEBUG_DUPLICATES(x)
0026 #endif
0027 
0028 namespace Digikam
0029 {
0030 
0031 HaarIface::HaarIface()
0032     : d(new Private())
0033 {
0034     qRegisterMetaType<DuplicatesResultsMap>("HaarIface::DuplicatesResultsMap");
0035 }
0036 
0037 HaarIface::HaarIface(const QSet<qlonglong>& images2Scan)
0038     : HaarIface()
0039 {
0040     d->rebuildSignatureCache(images2Scan);
0041 }
0042 
0043 HaarIface::~HaarIface()
0044 {
0045     delete d;
0046 }
0047 
0048 void HaarIface::setAlbumRootsToSearch(const QList<int>& albumRootIds)
0049 {
0050     setAlbumRootsToSearch(QSet<int>(albumRootIds.begin(), albumRootIds.end()));
0051 }
0052 
0053 void HaarIface::setAlbumRootsToSearch(const QSet<int>& albumRootIds)
0054 {
0055     d->setAlbumRootsToSearch(albumRootIds);
0056 }
0057 
0058 int HaarIface::preferredSize()
0059 {
0060     return Haar::NumberOfPixels;
0061 }
0062 
0063 bool HaarIface::indexImage(const QString& filename)
0064 {
0065     QImage image = loadQImage(filename);
0066 
0067     if (image.isNull())
0068     {
0069         return false;
0070     }
0071 
0072     return indexImage(filename, image);
0073 }
0074 
0075 bool HaarIface::indexImage(const QString& filename, const QImage& image)
0076 {
0077     ItemInfo info = ItemInfo::fromLocalFile(filename);
0078 
0079     if (info.isNull())
0080     {
0081         return false;
0082     }
0083 
0084     return indexImage(info.id(), image);
0085 }
0086 
0087 bool HaarIface::indexImage(const QString& filename, const DImg& image)
0088 {
0089     ItemInfo info = ItemInfo::fromLocalFile(filename);
0090 
0091     if (info.isNull())
0092     {
0093         return false;
0094     }
0095 
0096     return indexImage(info.id(), image);
0097 }
0098 
0099 bool HaarIface::indexImage(qlonglong imageid, const QImage& image)
0100 {
0101     if (image.isNull())
0102     {
0103         return false;
0104     }
0105 
0106     d->setImageDataFromImage(image);
0107 
0108     return indexImage(imageid);
0109 }
0110 
0111 bool HaarIface::indexImage(qlonglong imageid, const DImg& image)
0112 {
0113     if (image.isNull())
0114     {
0115         return false;
0116     }
0117 
0118     d->setImageDataFromImage(image);
0119 
0120     return indexImage(imageid);
0121 }
0122 
0123 // NOTE: private method: d->m_data has been filled
0124 
0125 bool HaarIface::indexImage(qlonglong imageid)
0126 {
0127     Haar::Calculator haar;
0128     haar.transform(d->imageData());
0129 
0130     Haar::SignatureData sig;
0131     haar.calcHaar(d->imageData(), &sig);
0132 
0133     // Store main entry
0134 
0135     DatabaseBlob blob;
0136     QByteArray array = blob.write(sig);
0137 
0138     ItemInfo info(imageid);
0139 
0140     if (!info.isNull() && info.isVisible())
0141     {
0142         SimilarityDbAccess().backend()->execSql(QString::fromUtf8("REPLACE INTO ImageHaarMatrix "
0143                                                                   " (imageid, modificationDate, uniqueHash, matrix) "
0144                                                                   " VALUES(?, ?, ?, ?);"),
0145                                                 imageid, info.modDateTime(), info.uniqueHash(), array);
0146     }
0147 
0148     return true;
0149 }
0150 
0151 QString HaarIface::signatureAsText(const QImage& image)
0152 {
0153     d->setImageDataFromImage(image);
0154 
0155     Haar::Calculator haar;
0156     haar.transform(d->imageData());
0157     Haar::SignatureData sig;
0158     haar.calcHaar(d->imageData(), &sig);
0159 
0160     DatabaseBlob blob;
0161     QByteArray array = blob.write(sig);
0162 
0163     return QString::fromUtf8(array.toBase64());
0164 }
0165 
0166 QPair<double, QMap<qlonglong, double> > HaarIface::bestMatchesForImageWithThreshold(const QString& imagePath,
0167                                                                                     double requiredPercentage,
0168                                                                                     double maximumPercentage,
0169                                                                                     const QList<int>& targetAlbums,
0170                                                                                         DuplicatesSearchRestrictions
0171                                                                                     searchResultRestriction,
0172                                                                                     SketchType type)
0173 {
0174     DImg image(imagePath);
0175 
0176     if (image.isNull())
0177     {
0178         return QPair<double, QMap<qlonglong, double> >();
0179     }
0180 
0181     d->setImageDataFromImage(image);
0182 
0183     Haar::Calculator haar;
0184     haar.transform(d->imageData());
0185     Haar::SignatureData sig;
0186     haar.calcHaar(d->imageData(), &sig);
0187 
0188     // Remove all previous similarities from pictures
0189 
0190     SimilarityDbAccess().db()->removeImageSimilarity(0);
0191 
0192     // Apply duplicates search for the image. Use the image id 0 which cannot be present.
0193 
0194     return bestMatchesWithThreshold(0,
0195                                     &sig,
0196                                     requiredPercentage,
0197                                     maximumPercentage,
0198                                     targetAlbums,
0199                                     searchResultRestriction,
0200                                     type);
0201 }
0202 
0203 QPair<double, QMap<qlonglong, double> > HaarIface::bestMatchesForImageWithThreshold(qlonglong imageId,
0204                                                                                     double requiredPercentage,
0205                                                                                     double maximumPercentage,
0206                                                                                     const QList<int>& targetAlbums,
0207                                                                                         DuplicatesSearchRestrictions
0208                                                                                     searchResultRestriction,
0209                                                                                     SketchType type)
0210 {
0211     Haar::SignatureData sig;
0212 
0213     if (d->hasSignatureCache())
0214     {
0215         if (!d->retrieveSignatureFromCache(imageId, sig))
0216         {
0217             return {};
0218         }
0219     }
0220     else
0221     {
0222         if (!retrieveSignatureFromDB(imageId, sig))
0223         {
0224             return {};
0225         }
0226     }
0227 
0228     return bestMatchesWithThreshold(imageId,
0229                                     &sig,
0230                                     requiredPercentage,
0231                                     maximumPercentage,
0232                                     targetAlbums,
0233                                     searchResultRestriction,
0234                                     type);
0235 }
0236 
0237 QMap<qlonglong, double> HaarIface::bestMatchesForSignature(const QString& signature,
0238                                                            const QList<int>& targetAlbums,
0239                                                            int numberOfResults,
0240                                                            SketchType type)
0241 {
0242     QByteArray bytes = QByteArray::fromBase64(signature.toLatin1());
0243 
0244     DatabaseBlob blobReader;
0245     Haar::SignatureData sig;
0246     blobReader.read(bytes, sig);
0247 
0248     // Get all matching images with their score and save their similarity to the signature, i.e. id -2
0249 
0250     QMultiMap<double, qlonglong> matches = bestMatches(&sig, numberOfResults, targetAlbums, type);
0251     QMap<qlonglong, double> result;
0252 
0253     for (QMultiMap<double, qlonglong>::const_iterator it = matches.constBegin() ;
0254          it != matches.constEnd() ; ++it)
0255     {
0256         // Add the image id and the normalised score (make sure that it is positive and between 0 and 1.
0257 
0258         result.insert(it.value(), (0.0 - (it.key() / 100)));
0259     }
0260 
0261     return result;
0262 }
0263 
0264 QMultiMap<double, qlonglong> HaarIface::bestMatches(Haar::SignatureData* const querySig,
0265                                                     int numberOfResults,
0266                                                     const QList<int>& targetAlbums,
0267                                                     SketchType type)
0268 {
0269     QMap<qlonglong, double> scores = searchDatabase(querySig, type, targetAlbums);
0270 
0271     // Find out the best matches, those with the lowest score
0272     // We make use of the feature that QMap keys are sorted in ascending order
0273     // Of course, images can have the same score, so we need a multi map
0274 
0275     QMultiMap<double, qlonglong> bestMatches;
0276     bool                         initialFill = false;
0277     double                       score, worstScore, bestScore;
0278     qlonglong                    id;
0279 
0280     for (QMap<qlonglong, double>::const_iterator it = scores.constBegin() ;
0281          it != scores.constEnd() ; ++it)
0282     {
0283         score = it.value();
0284         id    = it.key();
0285 
0286         if (!initialFill)
0287         {
0288             // as long as the maximum number of results is not reached, just fill up the map
0289 
0290             bestMatches.insert(score, id);
0291             initialFill = (bestMatches.size() >= numberOfResults);
0292         }
0293         else
0294         {
0295             // find the last entry, the one with the highest (=worst) score
0296 
0297             QMultiMap<double, qlonglong>::iterator last = bestMatches.end();
0298             --last;
0299             worstScore = last.key();
0300 
0301             // if the new entry has a higher score, put it in the list and remove that last one
0302 
0303             if (score < worstScore)
0304             {
0305                 bestMatches.erase(last);
0306                 bestMatches.insert(score, id);
0307             }
0308             else if (score == worstScore)
0309             {
0310                 bestScore = bestMatches.begin().key();
0311 
0312                 // if the score is identical for all entries, increase the maximum result number
0313 
0314                 if (score == bestScore)
0315                 {
0316                     bestMatches.insert(score, id);
0317                 }
0318             }
0319         }
0320     }
0321 
0322 /*
0323     for (QMap<double, qlonglong>::iterator it = bestMatches.begin(); it != bestMatches.end(); ++it)
0324     {
0325         qCDebug(DIGIKAM_DATABASE_LOG) << it.key() << it.value();
0326     }
0327 */
0328 
0329     return bestMatches;
0330 }
0331 
0332 QPair<double, QMap<qlonglong, double> > HaarIface::bestMatchesWithThreshold(qlonglong imageid,
0333                                                                             Haar::SignatureData* const querySig,
0334                                                                             double requiredPercentage,
0335                                                                             double maximumPercentage,
0336                                                                             const QList<int>& targetAlbums,
0337                                                                                 DuplicatesSearchRestrictions
0338                                                                             searchResultRestriction,
0339                                                                             SketchType type)
0340 {
0341     int albumId                    = CoreDbAccess().db()->getItemAlbum(imageid);
0342     QMap<qlonglong, double> scores = searchDatabase(querySig,
0343                                                     type,
0344                                                     targetAlbums,
0345                                                     searchResultRestriction,
0346                                                     imageid,
0347                                                     albumId);
0348     double lowest, highest;
0349     getBestAndWorstPossibleScore(querySig, type, &lowest, &highest);
0350 
0351     // The range between the highest (worst) and lowest (best) score
0352     // example: 0.2 and 0.5 -> 0.3
0353 
0354     double scoreRange      = highest - lowest;
0355 
0356     // The lower the requiredPercentage is, the higher will the result be.
0357     // example: 0.7 -> 0.3
0358 
0359     double percentageRange = 1.0 - requiredPercentage;
0360 
0361     // example: 0.2 + (0.3 * 0.3) = 0.2 + 0.09 = 0.29
0362 
0363     double requiredScore   = lowest + scoreRange * percentageRange;
0364 
0365     // Set the supremum which solves the problem that if
0366     // required == maximum, no results will be returned.
0367     // Eg, id required == maximum == 50.0, only images with exactly this
0368     // similarity are returned. But users expect also to see images
0369     // with similarity 50,x.
0370 
0371     double supremum = (floor(maximumPercentage * 100 + 1.0)) / 100;
0372 
0373     QMap<qlonglong, double> bestMatches;
0374     double score, percentage, avgPercentage = 0.0;
0375     QPair<double, QMap<qlonglong, double> > result;
0376     qlonglong id;
0377 
0378     for (QMap<qlonglong, double>::const_iterator it = scores.constBegin() ;
0379          it != scores.constEnd() ; ++it)
0380     {
0381         score = it.value();
0382         id    = it.key();
0383 
0384         // If the score of the picture is at most the required (maximum) score and
0385 
0386         if (score <= requiredScore)
0387         {
0388             percentage = 1.0 - (score - lowest) / scoreRange;
0389 
0390             // If the found image is the original one (check by id) or the percentage is below the maximum.
0391 
0392             if ((id == imageid) || (percentage < supremum))
0393             {
0394                 bestMatches.insert(id, percentage);
0395 
0396                 // If the current image is not the original, use the images similarity for the average percentage
0397                 // Also, save the similarity of the found image to the original image.
0398 
0399                 if (id != imageid)
0400                 {
0401                     // Store the similarity if the reference image has a valid image id
0402 
0403                     if (imageid > 0)
0404                     {
0405                         SimilarityDbAccess().db()->setImageSimilarity(id, imageid, percentage);
0406                     }
0407 
0408                     avgPercentage += percentage;
0409                 }
0410             }
0411         }
0412     }
0413 
0414     // Debug output
0415 
0416     if (bestMatches.count() > 1)
0417     {
0418         // The average percentage is the sum of all percentages
0419         // (without the original picture) divided by the count of pictures -1.
0420         // Subtracting 1 is necessary since the original picture is not used for the calculation.
0421 
0422         avgPercentage = avgPercentage / (bestMatches.count() - 1);
0423 
0424         qCDebug(DIGIKAM_DATABASE_LOG) << "Duplicates with id and score:";
0425 
0426         for (QMap<qlonglong, double>::const_iterator it = bestMatches.constBegin() ; it != bestMatches.constEnd() ; ++it)
0427         {
0428             qCDebug(DIGIKAM_DATABASE_LOG) << it.key() << QString::number(it.value() * 100) + QLatin1Char('%');
0429         }
0430     }
0431 
0432     result.first  = avgPercentage;
0433     result.second = bestMatches;
0434 
0435     return result;
0436 }
0437 
0438 bool HaarIface::fulfillsRestrictions(qlonglong imageId, int albumId,
0439                                      qlonglong originalImageId,
0440                                      int originalAlbumId,
0441                                      const QList<int>& targetAlbums,
0442                                      DuplicatesSearchRestrictions searchResultRestriction)
0443 {
0444     if      (imageId == originalImageId)
0445     {
0446         return true;
0447     }
0448     else if (targetAlbums.isEmpty() || targetAlbums.contains(albumId))
0449     {
0450         return ( searchResultRestriction == None)                                            ||
0451                ((searchResultRestriction == SameAlbum)      && (originalAlbumId == albumId)) ||
0452                ((searchResultRestriction == DifferentAlbum) && (originalAlbumId != albumId));
0453     }
0454     else
0455     {
0456         return false;
0457     }
0458 }
0459 
0460 QMap<qlonglong, double> HaarIface::searchDatabase(Haar::SignatureData* const querySig,
0461                                                   SketchType type, const QList<int>& targetAlbums,
0462                                                   DuplicatesSearchRestrictions searchResultRestriction,
0463                                                   qlonglong originalImageId,
0464                                                   int originalAlbumId)
0465 {
0466     // The table of constant weight factors applied to each channel and the weight bin
0467 
0468     Haar::Weights weights((Haar::Weights::SketchType)type);
0469 
0470     // layout the query signature for fast lookup
0471 
0472     Haar::SignatureMap queryMapY, queryMapI, queryMapQ;
0473     queryMapY.fill(querySig->sig[0]);
0474     queryMapI.fill(querySig->sig[1]);
0475     queryMapQ.fill(querySig->sig[2]);
0476     std::reference_wrapper<Haar::SignatureMap> queryMaps[3] = { queryMapY, queryMapI, queryMapQ };
0477 
0478     // Map imageid -> score. Lowest score is best.
0479     // any newly inserted value will be initialized with a score of 0, as required
0480 
0481     QMap<qlonglong, double> scores;
0482 
0483     // if no cache is used or the cache signature map is empty, query the database
0484 
0485     if (!d->hasSignatureCache())
0486     {
0487         d->rebuildSignatureCache();
0488     }
0489 
0490     for (auto it = d->signatureCache()->constBegin() ; it != d->signatureCache()->constEnd() ; ++it)
0491     {
0492         // If the image is the original one or
0493         // No restrictions apply or
0494         // SameAlbum restriction applies and the albums are equal or
0495         // DifferentAlbum restriction applies and the albums differ
0496         // then calculate the score.
0497 
0498         const qlonglong& imageId = it.key();
0499 
0500         if (fulfillsRestrictions(imageId, d->albumCache()->value(imageId), originalImageId,
0501                                  originalAlbumId, targetAlbums, searchResultRestriction))
0502         {
0503             const Haar::SignatureData& data = it.value();
0504             scores[imageId]                 = calculateScore(*querySig, data, weights, queryMaps);
0505         }
0506     }
0507 
0508     return scores;
0509 }
0510 
0511 QImage HaarIface::loadQImage(const QString& filename)
0512 {
0513     // NOTE: Can be optimized using DImg.
0514 
0515     QImage image;
0516 
0517     if (JPEGUtils::isJpegImage(filename))
0518     {
0519         // use fast jpeg loading
0520 
0521         if (!JPEGUtils::loadJPEGScaled(image, filename, Haar::NumberOfPixels))
0522         {
0523             // try QT now.
0524 
0525             if (!image.load(filename))
0526             {
0527                 return QImage();
0528             }
0529         }
0530     }
0531     else
0532     {
0533         // use default QT image loading
0534 
0535         if (!image.load(filename))
0536         {
0537             return QImage();
0538         }
0539     }
0540 
0541     return image;
0542 }
0543 
0544 bool HaarIface::retrieveSignatureFromDB(qlonglong imageid, Haar::SignatureData& sig)
0545 {
0546     QList<QVariant> values;
0547     SimilarityDbAccess().backend()->execSql(QString::fromUtf8("SELECT matrix FROM ImageHaarMatrix "
0548                                                               " WHERE imageid=?;"),
0549                                             imageid, &values);
0550 
0551     if (values.isEmpty())
0552     {
0553         return false;
0554     }
0555 
0556     DatabaseBlob blob;
0557 
0558     blob.read(values.first().toByteArray(), sig);
0559     return true;
0560 }
0561 
0562 void HaarIface::getBestAndWorstPossibleScore(Haar::SignatureData* const sig,
0563                                              SketchType type,
0564                                              double* const lowestAndBestScore,
0565                                              double* const highestAndWorstScore)
0566 {
0567     Haar::Weights weights(static_cast<Haar::Weights::SketchType>(type));
0568     double score = 0;
0569 
0570     // In the first step, the score is initialized with the weighted color channel averages.
0571     // We don't know the target channel average here, we only now its not negative => assume 0
0572 
0573     for (int channel = 0 ; channel < 3 ; ++channel)
0574     {
0575         score += weights.weightForAverage(channel) * fabs(sig->avg[channel] /*- targetSig.avg[channel]*/);
0576     }
0577 
0578     *highestAndWorstScore = score;
0579 
0580     // Next consideration: The lowest possible score is reached if the signature is identical.
0581     // The first step (see above) will result in 0 - skip it.
0582     // In the second step, for every coefficient in the sig that have query and target in common,
0583     // so in our case all 3*40, subtract the specifically assigned weighting.
0584 
0585     score = 0;
0586 
0587     for (int channel = 0 ; channel < 3 ; ++channel)
0588     {
0589         Haar::Idx* const coefs = sig->sig[channel];
0590 
0591         for (int coef = 0 ; coef < Haar::NumberOfCoefficients ; ++coef)
0592         {
0593             score -= weights.weight(d->weightBin.binAbs(coefs[coef]), channel);
0594         }
0595     }
0596 
0597     *lowestAndBestScore = score;
0598 }
0599 
0600 
0601 QMap<QString, QString> HaarIface::writeSAlbumQueries(const DuplicatesResultsMap& searchResults)
0602 {
0603     // Build search XML from the results. Store list of ids of similar images.
0604 
0605     QMap<QString, QString> queries;
0606 
0607     for (auto it = searchResults.constBegin() ; it != searchResults.constEnd() ; ++it)
0608     {
0609         SearchXmlWriter writer;
0610         writer.writeGroup();
0611         writer.writeField(QLatin1String("imageid"), SearchXml::OneOf);
0612         writer.writeValue(it->second);
0613         writer.finishField();
0614 
0615         // Add the average similarity as field
0616 
0617         writer.writeField(QLatin1String("noeffect_avgsim"), SearchXml::Equal);
0618         writer.writeValue(it->first * 100);
0619         writer.finishField();
0620         writer.finishGroup();
0621         writer.finish();
0622 
0623         // Use the id of the first duplicate as name of the search
0624 
0625         queries.insert(QString::number(it.key()), writer.xml());
0626     }
0627 
0628     return queries;
0629 }
0630 
0631 void HaarIface::rebuildDuplicatesAlbums(const DuplicatesResultsMap& results, bool isAlbumUpdate)
0632 {
0633     // Build search XML from the results. Store list of ids of similar images.
0634 
0635     QMap<QString, QString> queries = writeSAlbumQueries(results);
0636 
0637     // Write the new search albums to the database.
0638 
0639     CoreDbAccess access;
0640     CoreDbTransaction transaction(&access);
0641 
0642     // Full rebuild: delete all old searches.
0643 
0644     if (!isAlbumUpdate)
0645     {
0646         access.db()->deleteSearches(DatabaseSearch::DuplicatesSearch);
0647     }
0648 
0649     // Create new groups, or update existing searches.
0650 
0651     for (QMap<QString, QString>::const_iterator it = queries.constBegin() ;
0652          it != queries.constEnd() ; ++it)
0653     {
0654         if (isAlbumUpdate)
0655         {
0656             access.db()->deleteSearch(it.key().toInt());
0657         }
0658 
0659         access.db()->addSearch(DatabaseSearch::DuplicatesSearch, it.key(), it.value());
0660     }
0661 }
0662 
0663 QSet<qlonglong> HaarIface::imagesFromAlbumsAndTags(const QList<int>& albums2Scan,
0664                                                    const QList<int>& tags2Scan,
0665                                                    AlbumTagRelation relation)
0666 {
0667     QSet<qlonglong> imagesFromAlbums;
0668     QSet<qlonglong> imagesFromTags;
0669     QSet<qlonglong> images;
0670 
0671     // Get all items DB id from all albums and all collections
0672 
0673     Q_FOREACH (int albumId, albums2Scan)
0674     {
0675         const auto list = CoreDbAccess().db()->getItemIDsInAlbum(albumId);
0676         imagesFromAlbums.unite(QSet<qlonglong>(list.begin(), list.end()));
0677     }
0678 
0679     // Get all items DB id from all tags
0680 
0681     Q_FOREACH (int albumId, tags2Scan)
0682     {
0683         const auto list = CoreDbAccess().db()->getItemIDsInTag(albumId);
0684         imagesFromTags.unite(QSet<qlonglong>(list.begin(), list.end()));
0685     }
0686 
0687     switch (relation)
0688     {
0689         case Union:
0690         {
0691             // ({} UNION A) UNION T = A UNION T
0692 
0693             images.unite(imagesFromAlbums).unite(imagesFromTags);
0694             break;
0695         }
0696 
0697         case Intersection:
0698         {
0699             // ({} UNION A) INTERSECT T = A INTERSECT T
0700 
0701             images.unite(imagesFromAlbums).intersect(imagesFromTags);
0702             break;
0703         }
0704 
0705         case AlbumExclusive:
0706         {
0707             // ({} UNION A) = A
0708 
0709             images.unite(imagesFromAlbums);
0710 
0711             // (A INTERSECT T) = A'
0712 
0713             imagesFromAlbums.intersect(imagesFromTags);
0714 
0715             // A\A' = albums without tags
0716 
0717             images.subtract(imagesFromAlbums);
0718             break;
0719         }
0720 
0721         case TagExclusive:
0722         {
0723             // ({} UNION T) = TT
0724 
0725             images.unite(imagesFromTags);
0726 
0727             // (A INTERSECT T) = A' = T'
0728 
0729             imagesFromAlbums.intersect(imagesFromTags);
0730 
0731             // T\T' = tags without albums
0732 
0733             images.subtract(imagesFromAlbums);
0734             break;
0735         }
0736 
0737         case NoMix:
0738         {
0739             if ((albums2Scan.isEmpty() && tags2Scan.isEmpty()))
0740             {
0741                 qCWarning(DIGIKAM_GENERAL_LOG) << "Duplicates search: Both the albums and the tags "
0742                                                   "list are non-empty but the album/tag relation "
0743                                                   "stated a NoMix. Skipping duplicates search";
0744                 return {};
0745             }
0746             else
0747             {
0748                 // ({} UNION A) UNION T = A UNION T = A Xor T
0749 
0750                 images.unite(imagesFromAlbums).unite(imagesFromTags);
0751             }
0752         }
0753     }
0754 
0755     return images;
0756 }
0757 
0758 HaarIface::DuplicatesResultsMap HaarIface::findDuplicates(const QSet<qlonglong>& images2Scan,
0759                                                           const QSet<qlonglong>::const_iterator& rangeBegin,
0760                                                           const QSet<qlonglong>::const_iterator& rangeEnd,
0761                                                           RefImageSelMethod refImageSelectionMethod,
0762                                                           const QSet<qlonglong>& refs,
0763                                                           double requiredPercentage,
0764                                                           double maximumPercentage,
0765                                                           DuplicatesSearchRestrictions searchResultRestriction,
0766                                                           HaarProgressObserver* const observer)
0767 {
0768     static const QList<int>                 emptyTargetAlbums;
0769     DuplicatesResultsMap                    resultsMap;
0770     DuplicatesResultsMap::iterator          resultsIterator;
0771     QSet<qlonglong>::const_iterator         images2ScanIterator;
0772     QPair<double, QMap<qlonglong, double> > bestMatches;
0773     QList<qlonglong>                        duplicates;
0774     QSet<qlonglong>                         resultsCandidates;
0775     const bool                              singleThread = ((rangeBegin == images2Scan.constBegin()) &&
0776                                                             (rangeEnd   == images2Scan.constEnd()));
0777 
0778     // create signature cache map for fast lookup
0779 
0780     if (!d->hasSignatureCache())
0781     {
0782         d->rebuildSignatureCache(images2Scan);
0783     }
0784 
0785     for (images2ScanIterator = rangeBegin ; images2ScanIterator != rangeEnd ; ++images2ScanIterator)
0786     {
0787 
0788 #if ENABLE_DEBUG_DUPLICATES
0789 
0790         {
0791             ItemInfo info(*images2ScanIterator);
0792             const QString path = info.filePath();
0793             const QString name = info.name();
0794             DEBUG_DUPLICATES("Iterate image: " << name << "Path: " << path);
0795         }
0796 
0797 #endif
0798 
0799         if (observer && observer->isCanceled())
0800         {
0801             break;
0802         }
0803 
0804         if (!resultsCandidates.contains(*images2ScanIterator))
0805         {
0806             // find images with required similarity
0807 
0808             bestMatches = bestMatchesForImageWithThreshold(*images2ScanIterator,
0809                                                            requiredPercentage,
0810                                                            maximumPercentage,
0811                                                            emptyTargetAlbums,
0812                                                            searchResultRestriction,
0813                                                            ScannedSketch);
0814 
0815             // We need only the image ids from the best matches map.
0816 
0817             duplicates = bestMatches.second.keys();
0818 
0819             // the list will usually contain one image: the original. Filter out.
0820 
0821             if (!(duplicates.isEmpty()) && !((duplicates.count() == 1) && (duplicates.first() == *images2ScanIterator)))
0822             {
0823                 DEBUG_DUPLICATES("\tHas duplicates");
0824 
0825                 // Use the oldest image date or larger pixel/file size as the reference image.
0826                 // Or if the image is in the refImage list
0827 
0828                 QDateTime refDateTime;
0829                 QDateTime refModDateTime;
0830                 quint64   refPixelSize  = 0;
0831                 qlonglong refFileSize   = 0;
0832                 qlonglong reference     = *images2ScanIterator;
0833 
0834                 const bool useReferenceImages = ((refImageSelectionMethod == RefImageSelMethod::PreferFolder) ||
0835                                                  (refImageSelectionMethod == RefImageSelMethod::ExcludeFolder));
0836 
0837                 bool referenceFound = false;
0838 
0839                 if (useReferenceImages)
0840                 {
0841                     for (auto it = refs.begin() ; it != refs.end() ; ++it)
0842                     {
0843 
0844 #if ENABLE_DEBUG_DUPLICATES
0845 
0846                         {
0847                             ItemInfo info(*it);
0848                             const QString path = info.filePath();
0849                             const QString name = info.name();
0850                             DEBUG_DUPLICATES("\tReference image: " << name << "Path: " << path << ", Id: " << info.id());
0851                         }
0852 
0853 #endif
0854 
0855                         if (*it == *images2ScanIterator)
0856                         {
0857                             // image of images2ScanIterator is already in the references present, so take it as the
0858                             // reference
0859 
0860                             DEBUG_DUPLICATES("\tReference found!");
0861                             referenceFound = true;
0862                             break;
0863                         }
0864                     }
0865                 }
0866 
0867 
0868                 if (!useReferenceImages                                                               ||
0869                     (!referenceFound && (refImageSelectionMethod == RefImageSelMethod::PreferFolder)) ||
0870                     (referenceFound  && (refImageSelectionMethod == RefImageSelMethod::ExcludeFolder)))
0871                 {
0872                     DEBUG_DUPLICATES("\tChecking Duplicates")
0873 
0874                     Q_FOREACH (const qlonglong& refId, duplicates)
0875                     {
0876 
0877 #if ENABLE_DEBUG_DUPLICATES
0878 
0879                         {
0880                             ItemInfo info(refId);
0881                             const QString path = info.filePath();
0882                             const QString name = info.name();
0883                             DEBUG_DUPLICATES("\t\tDuplicates: " << name << "Path: " << path << ", Id: " << info.id());
0884                         }
0885 
0886 #endif
0887 
0888                         ItemInfo info(refId);
0889                         quint64 infoPixelSize = (quint64)info.dimensions().width() *
0890                                                 (quint64)info.dimensions().height();
0891 
0892                         bool referenceFound = false;
0893 
0894                         if (useReferenceImages)
0895                         {
0896                             for (auto it = refs.begin() ; it != refs.end() ; ++it)
0897                             {
0898 
0899 #if ENABLE_DEBUG_DUPLICATES
0900 
0901                                 {
0902                                     ItemInfo info(*it);
0903                                     const QString path = info.filePath();
0904                                     const QString name = info.name();
0905                                     DEBUG_DUPLICATES("\t\tReference image: " << name << "Path: " << path << ", Id: " << info.id());
0906                                 }
0907 
0908 #endif
0909 
0910                                 if (*it == refId)
0911                                 {
0912                                     DEBUG_DUPLICATES("\t\tReference found!");
0913                                     referenceFound = true;
0914                                     break;
0915                                 }
0916                             }
0917                         }
0918 
0919                         const bool preferFolderCond  = (referenceFound && (refImageSelectionMethod == RefImageSelMethod::PreferFolder));
0920 
0921                         const bool excludeFolderCond = (!referenceFound && (refImageSelectionMethod == RefImageSelMethod::ExcludeFolder));
0922 
0923                         const bool newerCreationCond = ((refImageSelectionMethod == RefImageSelMethod::NewerCreationDate) &&
0924                                                         (!refDateTime.isValid() || (info.dateTime() >  refDateTime)));
0925 
0926                         const bool newerModCond      = ((refImageSelectionMethod == RefImageSelMethod::NewerModificationDate) &&
0927                                                         (!refModDateTime.isValid() || (info.modDateTime() >  refModDateTime)));
0928 
0929                         const bool olderOrLargerCond = ((refImageSelectionMethod == RefImageSelMethod::OlderOrLarger)          &&
0930                                                         (!refDateTime.isValid()                                                ||
0931                                                         (infoPixelSize   >  refPixelSize)                                      ||
0932                                                         ((infoPixelSize  == refPixelSize) && (info.fileSize() >  refFileSize)) ||
0933                                                         ((infoPixelSize  == refPixelSize) && (info.fileSize() == refFileSize)  &&
0934                                                         (info.dateTime() <  refDateTime))));
0935 
0936                         if (preferFolderCond || excludeFolderCond || newerCreationCond || newerModCond || olderOrLargerCond)
0937                         {
0938                             reference      = refId;
0939                             refDateTime    = info.dateTime();
0940                             refModDateTime = info.modDateTime();
0941                             refFileSize    = info.fileSize();
0942                             refPixelSize   = infoPixelSize;
0943 
0944 #if ENABLE_DEBUG_DUPLICATES
0945 
0946                             {
0947                                 const QString path = info.filePath();
0948                                 const QString name = info.name();
0949                                 DEBUG_DUPLICATES("\t\tUse as eference image: " << name << "Path: " << path << ", Id: " << info.id() << "Pixelsize: " << infoPixelSize << ", File size: " << refFileSize << ", Datetime: " << refDateTime);
0950                             }
0951 
0952 #endif
0953 
0954                             if (preferFolderCond || excludeFolderCond)
0955                             {
0956                                 break;
0957                             }
0958                         }
0959                     }
0960                 }
0961 
0962                 resultsMap.insert(reference, qMakePair(bestMatches.first, duplicates));
0963 
0964                 resultsCandidates << *images2ScanIterator;
0965                 resultsCandidates.unite(QSet<qlonglong>(duplicates.begin(), duplicates.end()));
0966             }
0967         }
0968 
0969         // if an imageid is not a results candidate, remove it
0970         // from the cached signature map as well,
0971         // to greatly improve speed
0972 
0973         if (singleThread && !resultsCandidates.contains(*images2ScanIterator))
0974         {
0975             d->signatureCache()->remove(*images2ScanIterator);
0976         }
0977 
0978         if (observer)
0979         {
0980             observer->imageProcessed();
0981         }
0982     }
0983 #if ENABLE_DEBUG_DUPLICATES
0984 
0985     DEBUG_DUPLICATES("Results:");
0986 
0987     for (auto i = resultsMap.constBegin() ; i != resultsMap.constEnd() ; ++i)
0988     {
0989         ItemInfo info(i.key());
0990         const QString path = info.filePath();
0991         const QString name = info.name();
0992         DEBUG_DUPLICATES("\t\tReference image: " << name << "Path: " << path << ", Id: " << info.id());
0993     }
0994 
0995 #endif
0996 
0997     return resultsMap;
0998 }
0999 
1000 double HaarIface::calculateScore(const Haar::SignatureData& querySig,
1001                                  const Haar::SignatureData& targetSig,
1002                                  const Haar::Weights& weights,
1003                                  std::reference_wrapper<Haar::SignatureMap>* const queryMaps)
1004 {
1005     double score = 0.0;
1006 
1007     // Step 1: Initialize scores with average intensity values of all three channels
1008 
1009     for (int channel = 0 ; channel < 3 ; ++channel)
1010     {
1011         score += weights.weightForAverage(channel) * fabs(querySig.avg[channel] - targetSig.avg[channel]);
1012     }
1013 
1014     // Step 2: Decrease the score if query and target have significant coefficients in common
1015 
1016     int x        = 0;
1017 
1018     for (int channel = 0 ; channel < 3 ; ++channel)
1019     {
1020         const Haar::SignatureMap& queryMap = queryMaps[channel];
1021 
1022         for (int coef = 0 ; coef < Haar::NumberOfCoefficients ; ++coef)
1023         {
1024             // x is a pixel index, either positive or negative, 0..16384
1025 
1026             x = targetSig.sig[channel][coef];
1027 
1028             // If x is a significant coefficient with the same sign in the query signature as well,
1029             // decrease the score (lower is better)
1030             // Note: both method calls called with x accept positive or negative values
1031 
1032             if ((queryMap)[x])
1033             {
1034                 score -= weights.weight(d->weightBin.binAbs(x), channel);
1035             }
1036         }
1037     }
1038 
1039     return score;
1040 }
1041 
1042 } // namespace Digikam