File indexing completed on 2025-01-05 03:58:07
0001 /* ============================================================ 0002 * 0003 * This file is a part of digiKam project 0004 * https://www.digikam.org 0005 * 0006 * Date : 2019-05-15 0007 * Description : CLI tool to test and verify Face Recognition 0008 * NOTE: This tool integrates the whole Face Management 0009 * work flow, especially designed to verify and benchmark 0010 * Face Recognition algorithm. It is adapted from recognize.cpp 0011 * developed by Aditya Bhatt. 0012 * 0013 * SPDX-FileCopyrightText: 2019 by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com> 0014 * 0015 * SPDX-License-Identifier: GPL-2.0-or-later 0016 * 0017 * ============================================================ */ 0018 0019 // Qt includes 0020 0021 #include <QCoreApplication> 0022 #include <QDir> 0023 #include <QImage> 0024 #include <QElapsedTimer> 0025 #include <QCommandLineParser> 0026 #include <QRectF> 0027 #include <QList> 0028 #include <QUuid> 0029 0030 // Local includes 0031 0032 #include "digikam_debug.h" 0033 #include "dimg.h" 0034 #include "facescansettings.h" 0035 #include "facedetector.h" 0036 #include "facialrecognition_wrapper.h" 0037 #include "coredbaccess.h" 0038 #include "dbengineparameters.h" 0039 0040 using namespace Digikam; 0041 0042 // -------------------------------------------------------------------------------------------------- 0043 0044 QStringList toPaths(char** argv, int startIndex, int argc) 0045 { 0046 QStringList files; 0047 0048 for (int i = startIndex ; i < argc ; ++i) 0049 { 0050 files << QString::fromLatin1(argv[i]); 0051 } 0052 0053 return files; 0054 } 0055 0056 QList<QImage> toImages(const QStringList& paths) 0057 { 0058 QList<QImage> images; 0059 0060 Q_FOREACH (const QString& path, paths) 0061 { 0062 images << QImage(path); 0063 } 0064 0065 return images; 0066 } 0067 0068 void prepareForTrain(const QString& testSetPath, 0069 QMap<unsigned, QStringList>& testset, 0070 QMap<unsigned, QStringList>& trainingset, 0071 double ratio, 0072 unsigned int nbOfSamples, 0073 unsigned int& nbOfIdentities) 0074 { 0075 QDir testSet(testSetPath); 0076 QStringList subjects = testSet.entryList(QDir::Dirs | QDir::NoDotAndDotDot | QDir::NoSymLinks); 0077 0078 qCDebug(DIGIKAM_TESTS_LOG) << nbOfSamples << ", " << nbOfIdentities; 0079 0080 if (nbOfIdentities == 0) 0081 { 0082 nbOfIdentities = subjects.size(); 0083 } 0084 0085 for (unsigned i = 1 ; i <= nbOfIdentities ; ++i) 0086 { 0087 QString subjectPath = QString::fromLatin1("%1%2").arg(testSetPath) 0088 .arg(subjects.takeFirst()); 0089 QDir subjectDir(subjectPath); 0090 0091 QStringList files = subjectDir.entryList(QDir::Files); 0092 unsigned int nbOfSamplePerIdentity = (nbOfSamples == 0) ? files.size() : nbOfSamples; 0093 0094 for (unsigned j = 1 ; j <= nbOfSamplePerIdentity ; ++j) 0095 { 0096 QString path = QString::fromLatin1("%1/%2").arg(subjectPath) 0097 .arg(files.takeFirst()); 0098 0099 if (j <= static_cast<unsigned int>(qRound(nbOfSamplePerIdentity * ratio))) 0100 { 0101 trainingset[i] << path; 0102 qCDebug(DIGIKAM_TESTS_LOG) << "training " << path; 0103 } 0104 else 0105 { 0106 testset[i] << path; 0107 qCDebug(DIGIKAM_TESTS_LOG) << "test " << path; 0108 } 0109 } 0110 } 0111 } 0112 0113 QImage scaleForDetection(const DImg& image, FaceDetector& detector) 0114 { 0115 int recommendedSize = detector.recommendedImageSize(image.size()); 0116 0117 if (qMax(image.width(), image.height()) > (uint)recommendedSize) 0118 { 0119 return image.smoothScale(recommendedSize, recommendedSize, Qt::KeepAspectRatio).copyQImage(); 0120 } 0121 0122 return image.copyQImage(); 0123 } 0124 0125 QList<QRectF> processFaceDetection(const QImage& image, FaceDetector& detector) 0126 { 0127 DImg img(image); 0128 QImage detectionImage = scaleForDetection(img, detector); 0129 QList<QRectF> detectedFaces = detector.detectFaces(detectionImage, img.originalSize()); 0130 0131 qCDebug(DIGIKAM_TESTS_LOG) << "Found " << detectedFaces.size() << " faces"; 0132 0133 return detectedFaces; 0134 } 0135 0136 QList<QRectF> processFaceDetection(const QString& imagePath, FaceDetector& detector) 0137 { 0138 QList<QRectF> detectedFaces = detector.detectFaces(imagePath); 0139 0140 qCDebug(DIGIKAM_TESTS_LOG) << "(Input CV) Found " << detectedFaces.size() << " faces"; 0141 0142 return detectedFaces; 0143 } 0144 0145 QImage retrieveFace(const DImg& image, const QList<QRectF>& rects) 0146 { 0147 if (rects.size() > 1) 0148 { 0149 qFatal("More than 1 face found in image, strange for our test set!!!"); 0150 } 0151 0152 QRectF rect = rects.first(); 0153 QImage face = image.copyQImage(rect); 0154 0155 return face; 0156 } 0157 0158 QList<QImage*> retrieveFaces(const QList<QImage>& images, const QList<QRectF>& rects) 0159 { 0160 QList<QImage*> faces; 0161 unsigned index = 0; 0162 0163 Q_FOREACH (const QRectF& rect, rects) 0164 { 0165 DImg temp(images.at(index)); 0166 QImage* croppedFace = new QImage(); 0167 *croppedFace = temp.copyQImage(rect); 0168 0169 faces << croppedFace; 0170 ++index; 0171 } 0172 0173 return faces; 0174 } 0175 0176 // -------------------------------------------------------------------------------------------------- 0177 0178 int main(int argc, char* argv[]) 0179 { 0180 QCoreApplication app(argc, argv); 0181 app.setApplicationName(QString::fromLatin1("digikam")); // for DB init. 0182 0183 // Options for commandline parser 0184 0185 QCommandLineParser parser; 0186 parser.addOption(QCommandLineOption(QLatin1String("db"), QLatin1String("Faces database"), QLatin1String("path to db folder"))); 0187 parser.addOption(QCommandLineOption(QLatin1String("rs"), QLatin1String("Split ratio (test set / whole set)"), QLatin1String("decimal"))); 0188 parser.addOption(QCommandLineOption(QLatin1String("ts"), QLatin1String("Test set folder"), QLatin1String("path relative to db folder"))); 0189 parser.addOption(QCommandLineOption(QLatin1String("ds"), QLatin1String("Training set (dev set) folder"), QLatin1String("path relative to db folder"))); 0190 parser.addOption(QCommandLineOption(QLatin1String("ni"), QLatin1String("Number of total objects"), QLatin1String("nbIdentities"))); 0191 parser.addOption(QCommandLineOption(QLatin1String("ns"), QLatin1String("Number of samples per object"), QLatin1String("nbSamples"))); 0192 parser.addOption(QCommandLineOption(QLatin1String("as"), QLatin1String("Option to run test on the entire set"))); 0193 parser.addHelpOption(); 0194 parser.process(app); 0195 0196 // Parse arguments 0197 0198 bool optionErrors = false; 0199 0200 if (parser.optionNames().empty()) 0201 { 0202 qCWarning(DIGIKAM_TESTS_LOG) << "NO options!!!"; 0203 optionErrors = true; 0204 } 0205 else if (!parser.isSet(QLatin1String("db"))) 0206 { 0207 qCWarning(DIGIKAM_TESTS_LOG) << "MISSING database for test!!!"; 0208 optionErrors = true; 0209 } 0210 else if (!parser.isSet(QLatin1String("as")) && 0211 (!parser.isSet(QLatin1String("ni")) || !parser.isSet(QLatin1String("ns")))) 0212 { 0213 qCWarning(DIGIKAM_TESTS_LOG) << "UNKNOWN training set / test set separation!!!"; 0214 optionErrors = true; 0215 } 0216 else if (parser.isSet(QLatin1String("ts")) && !parser.isSet(QLatin1String("ds"))) 0217 { 0218 qCWarning(DIGIKAM_TESTS_LOG) << "UNKNOWN Dev set!!!"; 0219 optionErrors = true; 0220 } 0221 else if (parser.isSet(QLatin1String("ds")) && !parser.isSet(QLatin1String("ts"))) 0222 { 0223 qCWarning(DIGIKAM_TESTS_LOG) << "UNKNOWN Test set!!!"; 0224 optionErrors = true; 0225 } 0226 0227 if (optionErrors) 0228 { 0229 parser.showHelp(); 0230 return 1; 0231 } 0232 0233 QString facedb = parser.value(QLatin1String("db")); 0234 unsigned int nbOfSamples = 0; 0235 unsigned int nbOfIdentities = 0; 0236 0237 if (!parser.isSet(QLatin1String("as"))) 0238 { 0239 nbOfSamples = parser.value(QLatin1String("ns")).toUInt(); 0240 nbOfIdentities = parser.value(QLatin1String("ni")).toUInt(); 0241 } 0242 0243 double ratio = 0.0; 0244 0245 if (parser.isSet(QLatin1String("rs"))) 0246 { 0247 ratio = parser.value(QLatin1String("rs")).toDouble(); 0248 } 0249 0250 // Init config for digiKam 0251 0252 DbEngineParameters prm = DbEngineParameters::parametersFromConfig(); 0253 CoreDbAccess::setParameters(prm, CoreDbAccess::MainApplication); 0254 FacialRecognitionWrapper recognizer; 0255 0256 // Construct training set, test set 0257 0258 QMap<unsigned, QStringList> testset, trainingset; 0259 0260 if (ratio > 0.0) 0261 { 0262 prepareForTrain(facedb, testset, trainingset, ratio, nbOfSamples, nbOfIdentities); 0263 } 0264 else 0265 { 0266 QString testsetFolder = parser.value(QLatin1String("ts")); 0267 QString trainingsetFoler = parser.value(QLatin1String("ds")); 0268 0269 // TODO: Overload of prepareForTrain() to create training set and test set here 0270 } 0271 0272 // Create IDs 0273 0274 QMap<unsigned, Identity> idMap; 0275 0276 for (unsigned i = 1 ; i <= nbOfIdentities ; ++i) 0277 { 0278 QMultiMap<QString, QString> attributes; 0279 attributes.insert(QLatin1String("name"), QString::number(i)); 0280 idMap[i] = recognizer.addIdentityDebug(attributes); 0281 } 0282 0283 // Init FaceDetector used for detecting faces and bounding box 0284 // before recognizing 0285 0286 FaceDetector detector; 0287 0288 // Evaluation metrics 0289 unsigned int correct = 0, notRecognized = 0, falsePositive = 0, totalTrained = 0, totalRecognized = 0; 0290 unsigned int elapsedTraining = 0, elapsedTesting = 0; 0291 unsigned int detectingTime = 0; 0292 0293 /* 0294 * // Without using detector 0295 0296 for (QMap<unsigned, QStringList>::const_iterator it = trainingset.constBegin() ; 0297 it != trainingset.constEnd() ; ++it) 0298 { 0299 Identity identity = recognizer.findIdentity(QString::fromLatin1("name"), QString::number(it.key())); 0300 0301 if (identity.isNull()) 0302 { 0303 qCDebug(DIGIKAM_TESTS_LOG) << "Identity management failed for person " << it.key(); 0304 } 0305 0306 QList<QImage> images = toImages(it.value()); 0307 qCDebug(DIGIKAM_TESTS_LOG) << "Training directory " << it.key(); 0308 0309 recognizer.train(identity, images, trainingContext); 0310 totalTrained += images.size(); 0311 } 0312 0313 elapsedTraining = timer.restart(); 0314 0315 for (QMap<unsigned, QStringList>::const_iterator it = testset.constBegin() ; 0316 it != testset.constEnd() ; ++it) 0317 { 0318 Identity identity = idMap.value(it.key()); 0319 QList<QImage> images = toImages(it.value()); 0320 QList<Identity> results = recognizer.recognizeFaces(images); 0321 0322 qCDebug(DIGIKAM_TESTS_LOG) << "Result for " << it.value().first() << " is identity " << results.first().id(); 0323 0324 Q_FOREACH (const Identity& foundId, results) 0325 { 0326 if (foundId.isNull()) 0327 { 0328 ++notRecognized; 0329 } 0330 else if (foundId == identity) 0331 { 0332 ++correct; 0333 } 0334 else 0335 { 0336 ++falsePositive; 0337 } 0338 } 0339 0340 totalRecognized += images.size(); 0341 } 0342 0343 */ 0344 0345 QStringList undetectedTrainedFaces; 0346 QStringList undetectedTestedFaces; 0347 QStringList falsePositiveFaces; 0348 0349 QLatin1String trainingContext("Debug"); 0350 0351 for (QMap<unsigned, QStringList>::const_iterator it = trainingset.constBegin() ; 0352 it != trainingset.constEnd() ; ++it) 0353 { 0354 Identity identity = idMap.value(it.key()); 0355 QStringList imagePaths = it.value(); 0356 0357 QList<QImage> detectedFaces; 0358 QList<QRectF> bboxes; 0359 0360 QList<QImage> rawImages = toImages(imagePaths); 0361 qCDebug(DIGIKAM_TESTS_LOG) << "Training directory " << it.key(); 0362 0363 Q_FOREACH (const QImage& image, rawImages) 0364 { 0365 QString imagePath = imagePaths.takeFirst(); 0366 0367 // Start timing for benchmark face detection 0368 0369 QElapsedTimer timer; 0370 timer.start(); 0371 0372 QList<QRectF> detectedBoundingBox = processFaceDetection(imagePath, detector); 0373 0374 detectingTime += timer.elapsed(); 0375 0376 if (detectedBoundingBox.size()) 0377 { 0378 detectedFaces << image; 0379 bboxes << detectedBoundingBox.first(); 0380 ++totalTrained; 0381 } 0382 else 0383 { 0384 undetectedTrainedFaces << imagePath; 0385 } 0386 } 0387 0388 QList<QImage*> faces = retrieveFaces(detectedFaces, bboxes); 0389 0390 // Start timing for benchmark training 0391 0392 QElapsedTimer timer; 0393 timer.start(); 0394 0395 recognizer.train(identity, faces, trainingContext); 0396 0397 elapsedTraining += timer.elapsed(); 0398 } 0399 0400 for (QMap<unsigned, QStringList>::const_iterator it = testset.constBegin() ; 0401 it != testset.constEnd() ; ++it) 0402 { 0403 Identity identity = idMap.value(it.key()); 0404 QList<QImage> rawImages = toImages(it.value()); 0405 QStringList imagePaths = it.value(); 0406 0407 QList<QImage> detectedFaces; 0408 QList<QRectF> bboxes; 0409 0410 Q_FOREACH (const QImage& image, rawImages) 0411 { 0412 QString imagePath = imagePaths.takeFirst(); 0413 0414 // Start timing for benchmark face detection 0415 0416 QElapsedTimer timer; 0417 timer.start(); 0418 0419 QList<QRectF> detectedBoundingBox = processFaceDetection(imagePath, detector); 0420 detectingTime += timer.elapsed(); 0421 0422 if (detectedBoundingBox.size()) 0423 { 0424 detectedFaces << image; 0425 bboxes << detectedBoundingBox.first(); 0426 ++totalRecognized; 0427 } 0428 else 0429 { 0430 undetectedTestedFaces << imagePath; 0431 } 0432 0433 imagePaths << imagePath; 0434 } 0435 0436 QList<QImage*> faces = retrieveFaces(detectedFaces, bboxes); 0437 0438 // Start timing for benchmark testing 0439 0440 QElapsedTimer timer; 0441 timer.start(); 0442 0443 QList<Identity> results = recognizer.recognizeFaces(faces); 0444 elapsedTesting += timer.elapsed(); 0445 0446 // qCDebug(DIGIKAM_TESTS_LOG) << "Result for " << it.value().first() << " is identity " << results.first().id(); 0447 0448 Q_FOREACH (const Identity& foundId, results) 0449 { 0450 QString imagePath = imagePaths.takeFirst(); 0451 0452 if (foundId.isNull()) 0453 { 0454 ++notRecognized; 0455 } 0456 else if (foundId == identity) 0457 { 0458 ++correct; 0459 } 0460 else 0461 { 0462 ++falsePositive; 0463 falsePositiveFaces << QString::fromLatin1("Image at %1 with identity %2") 0464 .arg(imagePath) 0465 .arg(foundId.id()); 0466 } 0467 } 0468 0469 // totalRecognized += images.size(); 0470 } 0471 0472 unsigned nbUndetectedTrainedFaces = undetectedTrainedFaces.size(); 0473 qCDebug(DIGIKAM_TESTS_LOG) << "\n" << nbUndetectedTrainedFaces << " / " << totalTrained + nbUndetectedTrainedFaces 0474 << " (" << float(nbUndetectedTrainedFaces) / (totalTrained + nbUndetectedTrainedFaces) * 100 << "%)" 0475 << " faces cannot be detected for training"; 0476 0477 Q_FOREACH (const QString& path, undetectedTrainedFaces) 0478 { 0479 qCDebug(DIGIKAM_TESTS_LOG) << path; 0480 } 0481 0482 if (totalTrained) 0483 { 0484 qCDebug(DIGIKAM_TESTS_LOG) << "Training " << totalTrained << "of " << nbOfIdentities 0485 << " different objects took " << elapsedTraining << " ms, " 0486 << ((float)elapsedTraining/totalTrained) << " ms per image"; 0487 } 0488 0489 unsigned nbUndetectedTestedFaces = undetectedTestedFaces.size(); 0490 qCDebug(DIGIKAM_TESTS_LOG) << "\n" << nbUndetectedTestedFaces << " / " << totalRecognized + nbUndetectedTestedFaces 0491 << " (" << float(nbUndetectedTestedFaces) / (totalRecognized + nbUndetectedTestedFaces) * 100 << "%)" 0492 << " faces cannot be detected for testing"; 0493 0494 Q_FOREACH (const QString& path, undetectedTestedFaces) 0495 { 0496 qCDebug(DIGIKAM_TESTS_LOG) << path; 0497 } 0498 0499 if (totalRecognized) 0500 { 0501 qCDebug(DIGIKAM_TESTS_LOG) << "Recognition test performed on " << totalRecognized << " of " << nbOfIdentities << " different objects took " << elapsedTesting << " ms, " << ((float)elapsedTesting/totalRecognized) << " ms per image"; 0502 qCDebug(DIGIKAM_TESTS_LOG) << correct << " / " << totalRecognized << " (" << (float(correct) / totalRecognized*100) << "%) were correctly recognized"; 0503 qCDebug(DIGIKAM_TESTS_LOG) << falsePositive << " / " << totalRecognized << " (" << (float(falsePositive) / totalRecognized*100) << "%) were falsely assigned to an identity (false positive)"; 0504 qCDebug(DIGIKAM_TESTS_LOG) << notRecognized << " / " << totalRecognized << " (" << (float(notRecognized) / totalRecognized*100) << "%) were not recognized"; 0505 } 0506 else 0507 { 0508 qCDebug(DIGIKAM_TESTS_LOG) << "No face recognized"; 0509 } 0510 0511 qCDebug(DIGIKAM_TESTS_LOG) << "\nFalse positive faces"; 0512 0513 Q_FOREACH (const QString& path, falsePositiveFaces) 0514 { 0515 qCDebug(DIGIKAM_TESTS_LOG) << path; 0516 } 0517 0518 qCDebug(DIGIKAM_TESTS_LOG) << "\n Average time of face detection " 0519 << detectingTime*1.0 / (totalTrained + nbUndetectedTrainedFaces + totalRecognized + nbUndetectedTestedFaces) 0520 << "ms"; 0521 0522 return 0; 0523 }