File indexing completed on 2025-03-09 03:55:00
0001 /* ============================================================ 0002 * 0003 * This file is a part of digiKam project 0004 * https://www.digikam.org 0005 * 0006 * Date : 16/08/2016 0007 * Description : A Shape predictor class that can predicts 68 0008 * facial point including points surrounding faces 0009 * eyes, that can be used for detecting human eyes 0010 * positions, almost all codes are ported from dlib 0011 * library (dlib.net/) 0012 * 0013 * SPDX-FileCopyrightText: 2016 by Omar Amin <Omar dot moh dot amin at gmail dot com> 0014 * SPDX-FileCopyrightText: 2019 by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com> 0015 * SPDX-FileCopyrightText: 2016-2024 by Gilles Caulier <caulier dot gilles at gmail dot com> 0016 * 0017 * SPDX-License-Identifier: GPL-2.0-or-later 0018 * 0019 * ============================================================ */ 0020 0021 #include "shapepredictor.h" 0022 0023 namespace Digikam 0024 { 0025 0026 namespace RedEye 0027 { 0028 0029 QDataStream& operator << (QDataStream& dataStream, const SplitFeature& sp) 0030 { 0031 dataStream << sp.idx1 << sp.idx2 << sp.thresh; 0032 0033 return dataStream; 0034 } 0035 0036 QDataStream& operator >> (QDataStream& dataStream, SplitFeature& sp) 0037 { 0038 dataStream >> sp.idx1 >> sp.idx2 >> sp.thresh; 0039 0040 return dataStream; 0041 } 0042 0043 unsigned long left_child(unsigned long idx) 0044 { 0045 return (2*idx + 1); 0046 } 0047 0048 unsigned long right_child(unsigned long idx) 0049 { 0050 return (2*idx + 2); 0051 } 0052 0053 // ---------------------------------------------------------------------------------------- 0054 0055 unsigned long RegressionTree::num_leaves() const 0056 { 0057 return (unsigned long)leaf_values.size(); 0058 } 0059 0060 const std::vector<float>& RegressionTree::operator()(const std::vector<float>& feature_pixel_values, unsigned long& i) const 0061 { 0062 i = 0; 0063 0064 while (i < splits.size()) 0065 { 0066 if ((feature_pixel_values[splits[i].idx1] - feature_pixel_values[splits[i].idx2]) > splits[i].thresh) 0067 { 0068 i = left_child(i); 0069 } 0070 else 0071 { 0072 i = right_child(i); 0073 } 0074 } 0075 0076 i = i - (unsigned long)splits.size(); 0077 0078 return leaf_values[i]; 0079 } 0080 0081 QDataStream& operator << (QDataStream& dataStream, const RegressionTree& regtree) 0082 { 0083 dataStream << (unsigned int)regtree.splits.size(); 0084 0085 for (unsigned int i = 0 ; i < regtree.splits.size() ; ++i) 0086 { 0087 dataStream << regtree.splits[i]; 0088 } 0089 0090 dataStream << (unsigned int)regtree.leaf_values.size(); 0091 dataStream << (unsigned int)regtree.leaf_values[0].size(); 0092 0093 for (unsigned int i = 0 ; i < regtree.leaf_values.size() ; ++i) 0094 { 0095 for (unsigned int j = 0 ; j < regtree.leaf_values[i].size() ; ++j) 0096 { 0097 dataStream << regtree.leaf_values[i][j]; 0098 } 0099 0100 } 0101 0102 return dataStream; 0103 } 0104 0105 QDataStream& operator >> (QDataStream& dataStream, RegressionTree& regtree) 0106 { 0107 unsigned int size; 0108 dataStream >> size; 0109 regtree.splits.resize(size); 0110 0111 for (unsigned int i = 0 ; i < regtree.splits.size() ; ++i) 0112 { 0113 dataStream >> regtree.splits[i]; 0114 } 0115 0116 dataStream >> size; 0117 regtree.leaf_values.resize(size); 0118 dataStream >> size; 0119 0120 for (unsigned int i = 0 ; i < regtree.leaf_values.size() ; ++i) 0121 { 0122 regtree.leaf_values[i].resize(size); 0123 0124 for (unsigned int j = 0 ; j < regtree.leaf_values[i].size() ; ++j) 0125 { 0126 dataStream >> regtree.leaf_values[i][j]; 0127 } 0128 } 0129 0130 return dataStream; 0131 } 0132 0133 // ------------------------------------------------------------------------------------ 0134 0135 unsigned long nearestShapePoint(const std::vector<float>& shape, 0136 const std::vector<float>& pt) 0137 { 0138 // find the nearest part of the shape to this pixel 0139 0140 float best_dist = std::numeric_limits<float>::infinity(); 0141 const unsigned long num_shape_parts = (unsigned long)shape.size() / 2; 0142 unsigned long best_idx = 0; 0143 0144 for (unsigned long j = 0 ; j < num_shape_parts ; ++j) 0145 { 0146 const float dist = length_squared(location(shape, j) - pt); 0147 0148 if (dist < best_dist) 0149 { 0150 best_dist = dist; 0151 best_idx = j; 0152 } 0153 } 0154 0155 return best_idx; 0156 } 0157 0158 // ------------------------------------------------------------------------------------ 0159 0160 void createShapeRelativeEncoding(const std::vector<float>& shape, 0161 const std::vector<std::vector<float> >& pixel_coordinates, 0162 std::vector<unsigned long>& anchor_idx, 0163 std::vector<std::vector<float> >& deltas) 0164 { 0165 anchor_idx.resize(pixel_coordinates.size()); 0166 deltas.resize(pixel_coordinates.size()); 0167 0168 for (unsigned long i = 0 ; i < pixel_coordinates.size() ; ++i) 0169 { 0170 anchor_idx[i] = nearestShapePoint(shape, pixel_coordinates[i]); 0171 deltas[i] = pixel_coordinates[i] - location(shape, anchor_idx[i]); 0172 } 0173 } 0174 0175 // ------------------------------------------------------------------------------------ 0176 0177 PointTransformAffine findTformBetweenShapes(const std::vector<float>& from_shape, 0178 const std::vector<float>& to_shape) 0179 { 0180 Q_ASSERT((from_shape.size() == to_shape.size()) && 0181 ((from_shape.size() % 2) == 0) && 0182 (from_shape.size() > 0)); 0183 0184 std::vector<std::vector<float> > from_points, to_points; 0185 const unsigned long num = (unsigned long)from_shape.size() / 2; 0186 from_points.reserve(num); 0187 to_points.reserve(num); 0188 0189 if (num == 1) 0190 { 0191 // Just use an identity transform if there is only one landmark. 0192 0193 return PointTransformAffine(); 0194 } 0195 0196 for (unsigned long i = 0 ; i < num ; ++i) 0197 { 0198 from_points.push_back(location(from_shape, i)); 0199 to_points.push_back(location(to_shape, i)); 0200 } 0201 0202 return (findSimilarityTransform(from_points, to_points)); 0203 } 0204 0205 // ------------------------------------------------------------------------------------ 0206 0207 PointTransformAffine normalizingTform(const cv::Rect& rect) 0208 { 0209 std::vector<std::vector<float> > from_points, to_points; 0210 std::vector<float> tlcorner(2); 0211 tlcorner[0] = rect.x; 0212 tlcorner[1] = rect.y; 0213 std::vector<float> trcorner(2); 0214 trcorner[0] = rect.x + rect.width; 0215 trcorner[1] = rect.y; 0216 std::vector<float> brcorner(2); 0217 brcorner[0] = rect.x + rect.width; 0218 brcorner[1] = rect.y + rect.height; 0219 0220 std::vector<float> pt1(2); 0221 pt1[0] = 0; 0222 pt1[1] = 0; 0223 std::vector<float> pt2(2); 0224 pt2[0] = 1; 0225 pt2[1] = 0; 0226 std::vector<float> pt3(2); 0227 pt3[0] = 1; 0228 pt3[1] = 1; 0229 from_points.push_back(tlcorner); 0230 to_points.push_back(pt1); 0231 from_points.push_back(trcorner); 0232 to_points.push_back(pt2); 0233 from_points.push_back(brcorner); 0234 to_points.push_back(pt3); 0235 0236 return (findAffineTransform(from_points, to_points)); 0237 } 0238 0239 // ------------------------------------------------------------------------------------ 0240 0241 PointTransformAffine unnormalizingTform(const cv::Rect& rect) 0242 { 0243 std::vector<std::vector<float> > from_points, to_points; 0244 std::vector<float> tlcorner(2); 0245 tlcorner[0] = rect.x; 0246 tlcorner[1] = rect.y; 0247 std::vector<float> trcorner(2); 0248 trcorner[0] = rect.x + rect.width; 0249 trcorner[1] = rect.y; 0250 std::vector<float> brcorner(2); 0251 brcorner[0] = rect.x + rect.width; 0252 brcorner[1] = rect.y + rect.height; 0253 0254 std::vector<float> pt1(2); 0255 pt1[0] = 0; 0256 pt1[1] = 0; 0257 std::vector<float> pt2(2); 0258 pt2[0] = 1; 0259 pt2[1] = 0; 0260 std::vector<float> pt3(2); 0261 pt3[0] = 1; 0262 pt3[1] = 1; 0263 0264 to_points.push_back(tlcorner); 0265 from_points.push_back(pt1); 0266 to_points.push_back(trcorner); 0267 from_points.push_back(pt2); 0268 to_points.push_back(brcorner); 0269 from_points.push_back(pt3); 0270 0271 return (findAffineTransform(from_points, to_points)); 0272 } 0273 0274 bool pointContained(const cv::Rect& rect, 0275 const std::vector<float>& point) 0276 { 0277 int x = std::round(point[0]); 0278 int y = std::round(point[1]); 0279 0280 if ((x >= 0) && (x < rect.width) && 0281 (y >= 0) && (y < rect.height)) 0282 { 0283 return true; 0284 } 0285 else 0286 { 0287 return false; 0288 } 0289 } 0290 0291 // ------------------------------------------------------------------------------------ 0292 0293 void extractFeaturePixelValues(const cv::Mat& img_, 0294 const cv::Rect& rect, 0295 const std::vector<float>& current_shape, 0296 const std::vector<float>& reference_shape, 0297 const std::vector<unsigned long>& reference_pixel_anchor_idx, 0298 const std::vector<std::vector<float> >& reference_pixel_deltas, 0299 std::vector<float>& feature_pixel_values) 0300 { 0301 const std::vector<std::vector<float> > tform = findTformBetweenShapes(reference_shape, current_shape).get_m(); 0302 const PointTransformAffine tform_to_img = unnormalizingTform(rect); 0303 const cv::Rect area = cv::Rect(0, 0, img_.size().width, img_.size().height); 0304 cv::Mat img(img_); 0305 feature_pixel_values.resize(reference_pixel_deltas.size()); 0306 0307 for (unsigned long i = 0 ; i < feature_pixel_values.size() ; ++i) 0308 { 0309 // Compute the point in the current shape corresponding to the i-th pixel and 0310 // then map it from the normalized shape space into pixel space. 0311 0312 std::vector<float> p = tform_to_img(tform*reference_pixel_deltas[i] + location(current_shape, reference_pixel_anchor_idx[i])); 0313 0314 if (pointContained(area, p)) 0315 { 0316 feature_pixel_values[i] = img.at<unsigned char>(std::round(p[1]), std::round(p[0])); 0317 } 0318 else 0319 { 0320 feature_pixel_values[i] = 0; 0321 } 0322 } 0323 } 0324 0325 // ------------------------------------------------------------------------------------ 0326 0327 ShapePredictor::ShapePredictor() 0328 { 0329 } 0330 0331 unsigned long ShapePredictor::num_parts() const 0332 { 0333 return ((unsigned long)initial_shape.size() / 2); 0334 } 0335 0336 unsigned long ShapePredictor::num_features() const 0337 { 0338 unsigned long num = 0; 0339 0340 for (unsigned long iter = 0 ; iter < forests.size() ; ++iter) 0341 { 0342 for (unsigned long i = 0 ; i < forests[iter].size() ; ++i) 0343 { 0344 num += forests[iter][i].num_leaves(); 0345 } 0346 } 0347 0348 return num; 0349 } 0350 0351 FullObjectDetection ShapePredictor::operator()(const cv::Mat& img, const cv::Rect& rect) const 0352 { 0353 std::vector<float> current_shape = initial_shape; 0354 std::vector<float> feature_pixel_values; 0355 0356 for (unsigned long iter = 0 ; iter < forests.size() ; ++iter) 0357 { 0358 extractFeaturePixelValues(img, rect, current_shape, initial_shape, 0359 anchor_idx[iter], deltas[iter], feature_pixel_values); 0360 unsigned long leaf_idx; 0361 0362 // Evaluate all the trees at this level of the cascade. 0363 0364 for (unsigned long i = 0 ; i < forests[iter].size() ; ++i) 0365 { 0366 current_shape = current_shape + forests[iter][i](feature_pixel_values, leaf_idx); 0367 } 0368 } 0369 0370 // Convert the current_shape into a full_object_detection 0371 0372 const PointTransformAffine tform_to_img = unnormalizingTform(rect); 0373 std::vector<std::vector<float> > parts(current_shape.size() / 2); 0374 0375 for (unsigned long i = 0 ; i < parts.size() ; ++i) 0376 { 0377 parts[i] = tform_to_img(location(current_shape, i)); 0378 } 0379 0380 return FullObjectDetection(rect, parts); 0381 } 0382 0383 // ------------------------------------------------------------------------------------ 0384 0385 QDataStream& operator << (QDataStream& dataStream, const ShapePredictor& shape) 0386 { 0387 dataStream << (unsigned int)shape.initial_shape.size(); 0388 0389 for (unsigned int i = 0 ; i < shape.initial_shape.size() ; ++i) 0390 { 0391 dataStream << shape.initial_shape[i]; 0392 } 0393 0394 dataStream<<(unsigned int)shape.forests.size(); 0395 dataStream<<(unsigned int)shape.forests[0].size(); 0396 0397 for (unsigned int i = 0 ; i < shape.forests.size() ; ++i) 0398 { 0399 for (unsigned int j = 0 ; j < shape.forests[i].size() ; ++j) 0400 { 0401 dataStream << shape.forests[i][j]; 0402 } 0403 } 0404 0405 dataStream << (unsigned int)shape.anchor_idx.size(); 0406 dataStream << (unsigned int)shape.anchor_idx[0].size(); 0407 0408 for (unsigned int i = 0 ; i < shape.anchor_idx.size() ; ++i) 0409 { 0410 for (unsigned int j = 0 ; j < shape.anchor_idx[i].size() ; ++j) 0411 { 0412 dataStream << shape.anchor_idx[i][j]; 0413 } 0414 0415 } 0416 0417 dataStream << (unsigned int)shape.deltas.size(); 0418 dataStream << (unsigned int)shape.deltas[0].size(); 0419 0420 for (unsigned int i = 0 ; i < shape.deltas.size() ; ++i) 0421 { 0422 for (unsigned int j = 0 ; j < shape.deltas[i].size() ; ++j) 0423 { 0424 dataStream << shape.deltas[i][j][0]; 0425 dataStream << shape.deltas[i][j][1]; 0426 } 0427 } 0428 0429 return dataStream; 0430 } 0431 0432 QDataStream& operator >> (QDataStream& dataStream, ShapePredictor& shape) 0433 { 0434 unsigned int size; 0435 dataStream >> size; 0436 shape.initial_shape.resize(size); 0437 0438 for (unsigned int i = 0 ; i < shape.initial_shape.size() ; ++i) 0439 { 0440 dataStream >> shape.initial_shape[i]; 0441 } 0442 0443 dataStream >> size; 0444 shape.forests.resize(size); 0445 dataStream >> size; 0446 0447 for (unsigned int i = 0 ; i < shape.forests.size() ; ++i) 0448 { 0449 shape.forests[i].resize(size); 0450 0451 for (unsigned int j = 0 ; j < shape.forests[i].size() ; ++j) 0452 { 0453 dataStream >> shape.forests[i][j]; 0454 } 0455 } 0456 0457 dataStream >> size; 0458 shape.anchor_idx.resize(size); 0459 dataStream >> size; 0460 0461 for (unsigned int i = 0 ; i < shape.anchor_idx.size() ; ++i) 0462 { 0463 shape.anchor_idx[i].resize(size); 0464 0465 for (unsigned int j = 0 ; j < shape.anchor_idx[i].size() ; ++j) 0466 { 0467 dataStream >> shape.anchor_idx[i][j]; 0468 } 0469 0470 } 0471 0472 dataStream >> size; 0473 shape.deltas.resize(size); 0474 dataStream >> size; 0475 0476 for (unsigned int i = 0 ; i < shape.deltas.size() ; ++i) 0477 { 0478 shape.deltas[i].resize(size); 0479 0480 for (unsigned int j = 0 ; j < shape.deltas[i].size() ; ++j) 0481 { 0482 shape.deltas[i][j].resize(2); 0483 dataStream >> shape.deltas[i][j][0]; 0484 dataStream >> shape.deltas[i][j][1]; 0485 } 0486 } 0487 0488 return dataStream; 0489 } 0490 0491 } // namespace RedEye 0492 0493 } // namespace Digikam