File indexing completed on 2025-03-09 03:55:00

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam project
0004  * https://www.digikam.org
0005  *
0006  * Date        : 16/08/2016
0007  * Description : A Shape predictor class that can predicts 68
0008  *               facial point including points surrounding faces
0009  *               eyes, that can be used for detecting human eyes
0010  *               positions, almost all codes are ported from dlib
0011  *               library (dlib.net/)
0012  *
0013  * SPDX-FileCopyrightText: 2016      by Omar Amin <Omar dot moh dot amin at gmail dot com>
0014  * SPDX-FileCopyrightText: 2019      by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com>
0015  * SPDX-FileCopyrightText: 2016-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0016  *
0017  * SPDX-License-Identifier: GPL-2.0-or-later
0018  *
0019  * ============================================================ */
0020 
0021 #include "shapepredictor.h"
0022 
0023 namespace Digikam
0024 {
0025 
0026 namespace RedEye
0027 {
0028 
0029 QDataStream& operator << (QDataStream& dataStream, const SplitFeature& sp)
0030 {
0031     dataStream << sp.idx1 << sp.idx2 << sp.thresh;
0032 
0033     return dataStream;
0034 }
0035 
0036 QDataStream& operator >> (QDataStream& dataStream, SplitFeature& sp)
0037 {
0038     dataStream >> sp.idx1 >> sp.idx2 >> sp.thresh;
0039 
0040     return dataStream;
0041 }
0042 
0043 unsigned long left_child(unsigned long idx)
0044 {
0045     return (2*idx + 1);
0046 }
0047 
0048 unsigned long right_child(unsigned long idx)
0049 {
0050     return (2*idx + 2);
0051 }
0052 
0053 // ----------------------------------------------------------------------------------------
0054 
0055 unsigned long RegressionTree::num_leaves() const
0056 {
0057     return (unsigned long)leaf_values.size();
0058 }
0059 
0060 const std::vector<float>& RegressionTree::operator()(const std::vector<float>& feature_pixel_values, unsigned long& i) const
0061 {
0062     i = 0;
0063 
0064     while (i < splits.size())
0065     {
0066         if ((feature_pixel_values[splits[i].idx1] - feature_pixel_values[splits[i].idx2]) > splits[i].thresh)
0067         {
0068             i = left_child(i);
0069         }
0070         else
0071         {
0072             i = right_child(i);
0073         }
0074     }
0075 
0076     i = i - (unsigned long)splits.size();
0077 
0078     return leaf_values[i];
0079 }
0080 
0081 QDataStream& operator << (QDataStream& dataStream, const RegressionTree& regtree)
0082 {
0083     dataStream << (unsigned int)regtree.splits.size();
0084 
0085     for (unsigned int i = 0 ; i < regtree.splits.size() ; ++i)
0086     {
0087         dataStream << regtree.splits[i];
0088     }
0089 
0090     dataStream << (unsigned int)regtree.leaf_values.size();
0091     dataStream << (unsigned int)regtree.leaf_values[0].size();
0092 
0093     for (unsigned int i = 0 ; i < regtree.leaf_values.size() ; ++i)
0094     {
0095         for (unsigned int j = 0 ; j < regtree.leaf_values[i].size() ; ++j)
0096         {
0097             dataStream << regtree.leaf_values[i][j];
0098         }
0099 
0100     }
0101 
0102     return dataStream;
0103 }
0104 
0105 QDataStream& operator >> (QDataStream& dataStream, RegressionTree& regtree)
0106 {
0107     unsigned int size;
0108     dataStream >> size;
0109     regtree.splits.resize(size);
0110 
0111     for (unsigned int i = 0 ; i < regtree.splits.size() ; ++i)
0112     {
0113         dataStream >> regtree.splits[i];
0114     }
0115 
0116     dataStream >> size;
0117     regtree.leaf_values.resize(size);
0118     dataStream >> size;
0119 
0120     for (unsigned int i = 0 ; i < regtree.leaf_values.size() ; ++i)
0121     {
0122         regtree.leaf_values[i].resize(size);
0123 
0124         for (unsigned int j = 0 ; j < regtree.leaf_values[i].size() ; ++j)
0125         {
0126             dataStream >> regtree.leaf_values[i][j];
0127         }
0128     }
0129 
0130     return dataStream;
0131 }
0132 
0133 // ------------------------------------------------------------------------------------
0134 
0135 unsigned long nearestShapePoint(const std::vector<float>& shape,
0136                                 const std::vector<float>& pt)
0137 {
0138     // find the nearest part of the shape to this pixel
0139 
0140     float best_dist                     = std::numeric_limits<float>::infinity();
0141     const unsigned long num_shape_parts = (unsigned long)shape.size() / 2;
0142     unsigned long best_idx              = 0;
0143 
0144     for (unsigned long j = 0 ; j < num_shape_parts ; ++j)
0145     {
0146         const float dist = length_squared(location(shape, j) - pt);
0147 
0148         if (dist < best_dist)
0149         {
0150             best_dist = dist;
0151             best_idx  = j;
0152         }
0153     }
0154 
0155     return best_idx;
0156 }
0157 
0158 // ------------------------------------------------------------------------------------
0159 
0160 void createShapeRelativeEncoding(const std::vector<float>& shape,
0161                                  const std::vector<std::vector<float> >& pixel_coordinates,
0162                                  std::vector<unsigned long>& anchor_idx,
0163                                  std::vector<std::vector<float> >& deltas)
0164 {
0165     anchor_idx.resize(pixel_coordinates.size());
0166     deltas.resize(pixel_coordinates.size());
0167 
0168     for (unsigned long i = 0 ; i < pixel_coordinates.size() ; ++i)
0169     {
0170         anchor_idx[i] = nearestShapePoint(shape, pixel_coordinates[i]);
0171         deltas[i]     = pixel_coordinates[i] - location(shape, anchor_idx[i]);
0172     }
0173 }
0174 
0175 // ------------------------------------------------------------------------------------
0176 
0177 PointTransformAffine findTformBetweenShapes(const std::vector<float>& from_shape,
0178                                             const std::vector<float>& to_shape)
0179 {
0180     Q_ASSERT((from_shape.size() == to_shape.size()) &&
0181              ((from_shape.size() % 2) == 0)         &&
0182              (from_shape.size() > 0));
0183 
0184     std::vector<std::vector<float> > from_points, to_points;
0185     const unsigned long num = (unsigned long)from_shape.size() / 2;
0186     from_points.reserve(num);
0187     to_points.reserve(num);
0188 
0189     if (num == 1)
0190     {
0191         // Just use an identity transform if there is only one landmark.
0192 
0193         return PointTransformAffine();
0194     }
0195 
0196     for (unsigned long i = 0 ; i < num ; ++i)
0197     {
0198         from_points.push_back(location(from_shape, i));
0199         to_points.push_back(location(to_shape, i));
0200     }
0201 
0202     return (findSimilarityTransform(from_points, to_points));
0203 }
0204 
0205 // ------------------------------------------------------------------------------------
0206 
0207 PointTransformAffine normalizingTform(const cv::Rect& rect)
0208 {
0209     std::vector<std::vector<float> > from_points, to_points;
0210     std::vector<float> tlcorner(2);
0211     tlcorner[0] = rect.x;
0212     tlcorner[1] = rect.y;
0213     std::vector<float> trcorner(2);
0214     trcorner[0] = rect.x + rect.width;
0215     trcorner[1] = rect.y;
0216     std::vector<float> brcorner(2);
0217     brcorner[0] = rect.x + rect.width;
0218     brcorner[1] = rect.y + rect.height;
0219 
0220     std::vector<float> pt1(2);
0221     pt1[0]      = 0;
0222     pt1[1]      = 0;
0223     std::vector<float> pt2(2);
0224     pt2[0]      = 1;
0225     pt2[1]      = 0;
0226     std::vector<float> pt3(2);
0227     pt3[0]      = 1;
0228     pt3[1]      = 1;
0229     from_points.push_back(tlcorner);
0230     to_points.push_back(pt1);
0231     from_points.push_back(trcorner);
0232     to_points.push_back(pt2);
0233     from_points.push_back(brcorner);
0234     to_points.push_back(pt3);
0235 
0236     return (findAffineTransform(from_points, to_points));
0237 }
0238 
0239 // ------------------------------------------------------------------------------------
0240 
0241 PointTransformAffine unnormalizingTform(const cv::Rect& rect)
0242 {
0243     std::vector<std::vector<float> > from_points, to_points;
0244     std::vector<float> tlcorner(2);
0245     tlcorner[0] = rect.x;
0246     tlcorner[1] = rect.y;
0247     std::vector<float> trcorner(2);
0248     trcorner[0] = rect.x + rect.width;
0249     trcorner[1] = rect.y;
0250     std::vector<float> brcorner(2);
0251     brcorner[0] = rect.x + rect.width;
0252     brcorner[1] = rect.y + rect.height;
0253 
0254     std::vector<float> pt1(2);
0255     pt1[0]      = 0;
0256     pt1[1]      = 0;
0257     std::vector<float> pt2(2);
0258     pt2[0]      = 1;
0259     pt2[1]      = 0;
0260     std::vector<float> pt3(2);
0261     pt3[0]      = 1;
0262     pt3[1]      = 1;
0263 
0264     to_points.push_back(tlcorner);
0265     from_points.push_back(pt1);
0266     to_points.push_back(trcorner);
0267     from_points.push_back(pt2);
0268     to_points.push_back(brcorner);
0269     from_points.push_back(pt3);
0270 
0271     return (findAffineTransform(from_points, to_points));
0272 }
0273 
0274 bool pointContained(const cv::Rect& rect,
0275                     const std::vector<float>& point)
0276 {
0277     int x = std::round(point[0]);
0278     int y = std::round(point[1]);
0279 
0280     if ((x >= 0) && (x < rect.width)   &&
0281         (y >= 0) && (y < rect.height))
0282     {
0283         return true;
0284     }
0285     else
0286     {
0287         return false;
0288     }
0289 }
0290 
0291 // ------------------------------------------------------------------------------------
0292 
0293 void extractFeaturePixelValues(const cv::Mat& img_,
0294                                const cv::Rect& rect,
0295                                const std::vector<float>& current_shape,
0296                                const std::vector<float>& reference_shape,
0297                                const std::vector<unsigned long>& reference_pixel_anchor_idx,
0298                                const std::vector<std::vector<float> >& reference_pixel_deltas,
0299                                std::vector<float>& feature_pixel_values)
0300 {
0301     const std::vector<std::vector<float> > tform = findTformBetweenShapes(reference_shape, current_shape).get_m();
0302     const PointTransformAffine tform_to_img      = unnormalizingTform(rect);
0303     const cv::Rect area                          = cv::Rect(0, 0, img_.size().width, img_.size().height);
0304     cv::Mat img(img_);
0305     feature_pixel_values.resize(reference_pixel_deltas.size());
0306 
0307     for (unsigned long i = 0 ; i < feature_pixel_values.size() ; ++i)
0308     {
0309         // Compute the point in the current shape corresponding to the i-th pixel and
0310         // then map it from the normalized shape space into pixel space.
0311 
0312         std::vector<float> p = tform_to_img(tform*reference_pixel_deltas[i] + location(current_shape, reference_pixel_anchor_idx[i]));
0313 
0314         if (pointContained(area, p))
0315         {
0316             feature_pixel_values[i] = img.at<unsigned char>(std::round(p[1]), std::round(p[0]));
0317         }
0318         else
0319         {
0320             feature_pixel_values[i] = 0;
0321         }
0322     }
0323 }
0324 
0325 // ------------------------------------------------------------------------------------
0326 
0327 ShapePredictor::ShapePredictor()
0328 {
0329 }
0330 
0331 unsigned long ShapePredictor::num_parts() const
0332 {
0333     return ((unsigned long)initial_shape.size() / 2);
0334 }
0335 
0336 unsigned long ShapePredictor::num_features() const
0337 {
0338     unsigned long num = 0;
0339 
0340     for (unsigned long iter = 0 ; iter < forests.size() ; ++iter)
0341     {
0342         for (unsigned long i = 0 ; i < forests[iter].size() ; ++i)
0343         {
0344             num += forests[iter][i].num_leaves();
0345         }
0346     }
0347 
0348     return num;
0349 }
0350 
0351 FullObjectDetection ShapePredictor::operator()(const cv::Mat& img, const cv::Rect& rect) const
0352 {
0353     std::vector<float> current_shape = initial_shape;
0354     std::vector<float> feature_pixel_values;
0355 
0356     for (unsigned long iter = 0 ; iter < forests.size() ; ++iter)
0357     {
0358         extractFeaturePixelValues(img, rect, current_shape, initial_shape,
0359                                   anchor_idx[iter], deltas[iter], feature_pixel_values);
0360         unsigned long leaf_idx;
0361 
0362         // Evaluate all the trees at this level of the cascade.
0363 
0364         for (unsigned long i = 0 ; i < forests[iter].size() ; ++i)
0365         {
0366             current_shape = current_shape + forests[iter][i](feature_pixel_values, leaf_idx);
0367         }
0368     }
0369 
0370     // Convert the current_shape into a full_object_detection
0371 
0372     const PointTransformAffine tform_to_img = unnormalizingTform(rect);
0373     std::vector<std::vector<float> > parts(current_shape.size() / 2);
0374 
0375     for (unsigned long i = 0 ; i < parts.size() ; ++i)
0376     {
0377         parts[i] = tform_to_img(location(current_shape, i));
0378     }
0379 
0380     return FullObjectDetection(rect, parts);
0381 }
0382 
0383 // ------------------------------------------------------------------------------------
0384 
0385 QDataStream& operator << (QDataStream& dataStream, const ShapePredictor& shape)
0386 {
0387     dataStream << (unsigned int)shape.initial_shape.size();
0388 
0389     for (unsigned int i = 0 ; i < shape.initial_shape.size() ; ++i)
0390     {
0391         dataStream << shape.initial_shape[i];
0392     }
0393 
0394     dataStream<<(unsigned int)shape.forests.size();
0395     dataStream<<(unsigned int)shape.forests[0].size();
0396 
0397     for (unsigned int i = 0 ; i < shape.forests.size() ; ++i)
0398     {
0399         for (unsigned int j = 0 ; j < shape.forests[i].size() ; ++j)
0400         {
0401             dataStream << shape.forests[i][j];
0402         }
0403     }
0404 
0405     dataStream << (unsigned int)shape.anchor_idx.size();
0406     dataStream << (unsigned int)shape.anchor_idx[0].size();
0407 
0408     for (unsigned int i = 0 ; i < shape.anchor_idx.size() ; ++i)
0409     {
0410         for (unsigned int j = 0 ; j < shape.anchor_idx[i].size() ; ++j)
0411         {
0412             dataStream << shape.anchor_idx[i][j];
0413         }
0414 
0415     }
0416 
0417     dataStream << (unsigned int)shape.deltas.size();
0418     dataStream << (unsigned int)shape.deltas[0].size();
0419 
0420     for (unsigned int i = 0 ; i < shape.deltas.size() ; ++i)
0421     {
0422         for (unsigned int j = 0 ; j < shape.deltas[i].size() ; ++j)
0423         {
0424             dataStream << shape.deltas[i][j][0];
0425             dataStream << shape.deltas[i][j][1];
0426         }
0427     }
0428 
0429     return dataStream;
0430 }
0431 
0432 QDataStream& operator >> (QDataStream& dataStream, ShapePredictor& shape)
0433 {
0434     unsigned int size;
0435     dataStream >> size;
0436     shape.initial_shape.resize(size);
0437 
0438     for (unsigned int i = 0 ; i < shape.initial_shape.size() ; ++i)
0439     {
0440         dataStream >> shape.initial_shape[i];
0441     }
0442 
0443     dataStream >> size;
0444     shape.forests.resize(size);
0445     dataStream >> size;
0446 
0447     for (unsigned int i = 0 ; i < shape.forests.size() ; ++i)
0448     {
0449         shape.forests[i].resize(size);
0450 
0451         for (unsigned int j = 0 ; j < shape.forests[i].size() ; ++j)
0452         {
0453             dataStream >> shape.forests[i][j];
0454         }
0455     }
0456 
0457     dataStream >> size;
0458     shape.anchor_idx.resize(size);
0459     dataStream >> size;
0460 
0461     for (unsigned int i = 0 ; i < shape.anchor_idx.size() ; ++i)
0462     {
0463         shape.anchor_idx[i].resize(size);
0464 
0465         for (unsigned int j = 0 ; j < shape.anchor_idx[i].size() ; ++j)
0466         {
0467             dataStream >> shape.anchor_idx[i][j];
0468         }
0469 
0470     }
0471 
0472     dataStream >> size;
0473     shape.deltas.resize(size);
0474     dataStream >> size;
0475 
0476     for (unsigned int i = 0 ; i < shape.deltas.size() ; ++i)
0477     {
0478         shape.deltas[i].resize(size);
0479 
0480         for (unsigned int j = 0 ; j < shape.deltas[i].size() ; ++j)
0481         {
0482             shape.deltas[i][j].resize(2);
0483             dataStream >> shape.deltas[i][j][0];
0484             dataStream >> shape.deltas[i][j][1];
0485         }
0486     }
0487 
0488     return dataStream;
0489 }
0490 
0491 } // namespace RedEye
0492 
0493 } // namespace Digikam