preprocessing/shape-predictor/shapepredictor.cpp

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam project
0004  * https://www.digikam.org
0005  *
0006  * Date        : 16/08/2016
0007  * Description : A Shape predictor class that can predicts 68
0008  *               facial point including points surrounding faces
0009  *               eyes, that can be used for detecting human eyes
0010  *               positions, almost all codes are ported from dlib
0011  *               library (dlib.net/)
0012  *
0013  * SPDX-FileCopyrightText: 2016      by Omar Amin <Omar dot moh dot amin at gmail dot com>
0014  * SPDX-FileCopyrightText: 2019      by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com>
0015  * SPDX-FileCopyrightText: 2016-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0016  *
0017  * SPDX-License-Identifier: GPL-2.0-or-later
0018  *
0019  * ============================================================ */
0020
0021 #include "shapepredictor.h"
0022
0023 namespace Digikam
0024 {
0025
0026 namespace RedEye
0027 {
0028
0029 QDataStream& operator << (QDataStream& dataStream, const SplitFeature& sp)
0030 {
0031     dataStream << sp.idx1 << sp.idx2 << sp.thresh;
0032
0033     return dataStream;
0034 }
0035
0036 QDataStream& operator >> (QDataStream& dataStream, SplitFeature& sp)
0037 {
0038     dataStream >> sp.idx1 >> sp.idx2 >> sp.thresh;
0039
0040     return dataStream;
0041 }
0042
0043 unsigned long left_child(unsigned long idx)
0044 {
0045     return (2*idx + 1);
0046 }
0047
0048 unsigned long right_child(unsigned long idx)
0049 {
0050     return (2*idx + 2);
0051 }
0052
0053 // ----------------------------------------------------------------------------------------
0054
0055 unsigned long RegressionTree::num_leaves() const
0056 {
0057     return (unsigned long)leaf_values.size();
0058 }
0059
0060 const std::vector<float>& RegressionTree::operator()(const std::vector<float>& feature_pixel_values, unsigned long& i) const
0061 {
0062     i = 0;
0063
0064     while (i < splits.size())
0065     {
0066         if ((feature_pixel_values[splits[i].idx1] - feature_pixel_values[splits[i].idx2]) > splits[i].thresh)
0067         {
0068             i = left_child(i);
0069         }
0070         else
0071         {
0072             i = right_child(i);
0073         }
0074     }
0075
0076     i = i - (unsigned long)splits.size();
0077
0078     return leaf_values[i];
0079 }
0080
0081 QDataStream& operator << (QDataStream& dataStream, const RegressionTree& regtree)
0082 {
0083     dataStream << (unsigned int)regtree.splits.size();
0084
0085     for (unsigned int i = 0 ; i < regtree.splits.size() ; ++i)
0086     {
0087         dataStream << regtree.splits[i];
0088     }
0089
0090     dataStream << (unsigned int)regtree.leaf_values.size();
0091     dataStream << (unsigned int)regtree.leaf_values[0].size();
0092
0093     for (unsigned int i = 0 ; i < regtree.leaf_values.size() ; ++i)
0094     {
0095         for (unsigned int j = 0 ; j < regtree.leaf_values[i].size() ; ++j)
0096         {
0097             dataStream << regtree.leaf_values[i][j];
0098         }
0099
0100     }
0101
0102     return dataStream;
0103 }
0104
0105 QDataStream& operator >> (QDataStream& dataStream, RegressionTree& regtree)
0106 {
0107     unsigned int size;
0108     dataStream >> size;
0109     regtree.splits.resize(size);
0110
0111     for (unsigned int i = 0 ; i < regtree.splits.size() ; ++i)
0112     {
0113         dataStream >> regtree.splits[i];
0114     }
0115
0116     dataStream >> size;
0117     regtree.leaf_values.resize(size);
0118     dataStream >> size;
0119
0120     for (unsigned int i = 0 ; i < regtree.leaf_values.size() ; ++i)
0121     {
0122         regtree.leaf_values[i].resize(size);
0123
0124         for (unsigned int j = 0 ; j < regtree.leaf_values[i].size() ; ++j)
0125         {
0126             dataStream >> regtree.leaf_values[i][j];
0127         }
0128     }
0129
0130     return dataStream;
0131 }
0132
0133 // ------------------------------------------------------------------------------------
0134
0135 unsigned long nearestShapePoint(const std::vector<float>& shape,
0136                                 const std::vector<float>& pt)
0137 {
0138     // find the nearest part of the shape to this pixel
0139
0140     float best_dist                     = std::numeric_limits<float>::infinity();
0141     const unsigned long num_shape_parts = (unsigned long)shape.size() / 2;
0142     unsigned long best_idx              = 0;
0143
0144     for (unsigned long j = 0 ; j < num_shape_parts ; ++j)
0145     {
0146         const float dist = length_squared(location(shape, j) - pt);
0147
0148         if (dist < best_dist)
0149         {
0150             best_dist = dist;
0151             best_idx  = j;
0152         }
0153     }
0154
0155     return best_idx;
0156 }
0157
0158 // ------------------------------------------------------------------------------------
0159
0160 void createShapeRelativeEncoding(const std::vector<float>& shape,
0161                                  const std::vector<std::vector<float> >& pixel_coordinates,
0162                                  std::vector<unsigned long>& anchor_idx,
0163                                  std::vector<std::vector<float> >& deltas)
0164 {
0165     anchor_idx.resize(pixel_coordinates.size());
0166     deltas.resize(pixel_coordinates.size());
0167
0168     for (unsigned long i = 0 ; i < pixel_coordinates.size() ; ++i)
0169     {
0170         anchor_idx[i] = nearestShapePoint(shape, pixel_coordinates[i]);
0171         deltas[i]     = pixel_coordinates[i] - location(shape, anchor_idx[i]);
0172     }
0173 }
0174
0175 // ------------------------------------------------------------------------------------
0176
0177 PointTransformAffine findTformBetweenShapes(const std::vector<float>& from_shape,
0178                                             const std::vector<float>& to_shape)
0179 {
0180     Q_ASSERT((from_shape.size() == to_shape.size()) &&
0181              ((from_shape.size() % 2) == 0)         &&
0182              (from_shape.size() > 0));
0183
0184     std::vector<std::vector<float> > from_points, to_points;
0185     const unsigned long num = (unsigned long)from_shape.size() / 2;
0186     from_points.reserve(num);
0187     to_points.reserve(num);
0188
0189     if (num == 1)
0190     {
0191         // Just use an identity transform if there is only one landmark.
0192
0193         return PointTransformAffine();
0194     }
0195
0196     for (unsigned long i = 0 ; i < num ; ++i)
0197     {
0198         from_points.push_back(location(from_shape, i));
0199         to_points.push_back(location(to_shape, i));
0200     }
0201
0202     return (findSimilarityTransform(from_points, to_points));
0203 }
0204
0205 // ------------------------------------------------------------------------------------
0206
0207 PointTransformAffine normalizingTform(const cv::Rect& rect)
0208 {
0209     std::vector<std::vector<float> > from_points, to_points;
0210     std::vector<float> tlcorner(2);
0211     tlcorner[0] = rect.x;
0212     tlcorner[1] = rect.y;
0213     std::vector<float> trcorner(2);
0214     trcorner[0] = rect.x + rect.width;
0215     trcorner[1] = rect.y;
0216     std::vector<float> brcorner(2);
0217     brcorner[0] = rect.x + rect.width;
0218     brcorner[1] = rect.y + rect.height;
0219
0220     std::vector<float> pt1(2);
0221     pt1[0]      = 0;
0222     pt1[1]      = 0;
0223     std::vector<float> pt2(2);
0224     pt2[0]      = 1;
0225     pt2[1]      = 0;
0226     std::vector<float> pt3(2);
0227     pt3[0]      = 1;
0228     pt3[1]      = 1;
0229     from_points.push_back(tlcorner);
0230     to_points.push_back(pt1);
0231     from_points.push_back(trcorner);
0232     to_points.push_back(pt2);
0233     from_points.push_back(brcorner);
0234     to_points.push_back(pt3);
0235
0236     return (findAffineTransform(from_points, to_points));
0237 }
0238
0239 // ------------------------------------------------------------------------------------
0240
0241 PointTransformAffine unnormalizingTform(const cv::Rect& rect)
0242 {
0243     std::vector<std::vector<float> > from_points, to_points;
0244     std::vector<float> tlcorner(2);
0245     tlcorner[0] = rect.x;
0246     tlcorner[1] = rect.y;
0247     std::vector<float> trcorner(2);
0248     trcorner[0] = rect.x + rect.width;
0249     trcorner[1] = rect.y;
0250     std::vector<float> brcorner(2);
0251     brcorner[0] = rect.x + rect.width;
0252     brcorner[1] = rect.y + rect.height;
0253
0254     std::vector<float> pt1(2);
0255     pt1[0]      = 0;
0256     pt1[1]      = 0;
0257     std::vector<float> pt2(2);
0258     pt2[0]      = 1;
0259     pt2[1]      = 0;
0260     std::vector<float> pt3(2);
0261     pt3[0]      = 1;
0262     pt3[1]      = 1;
0263
0264     to_points.push_back(tlcorner);
0265     from_points.push_back(pt1);
0266     to_points.push_back(trcorner);
0267     from_points.push_back(pt2);
0268     to_points.push_back(brcorner);
0269     from_points.push_back(pt3);
0270
0271     return (findAffineTransform(from_points, to_points));
0272 }
0273
0274 bool pointContained(const cv::Rect& rect,
0275                     const std::vector<float>& point)
0276 {
0277     int x = std::round(point[0]);
0278     int y = std::round(point[1]);
0279
0280     if ((x >= 0) && (x < rect.width)   &&
0281         (y >= 0) && (y < rect.height))
0282     {
0283         return true;
0284     }
0285     else
0286     {
0287         return false;
0288     }
0289 }
0290
0291 // ------------------------------------------------------------------------------------
0292
0293 void extractFeaturePixelValues(const cv::Mat& img_,
0294                                const cv::Rect& rect,
0295                                const std::vector<float>& current_shape,
0296                                const std::vector<float>& reference_shape,
0297                                const std::vector<unsigned long>& reference_pixel_anchor_idx,
0298                                const std::vector<std::vector<float> >& reference_pixel_deltas,
0299                                std::vector<float>& feature_pixel_values)
0300 {
0301     const std::vector<std::vector<float> > tform = findTformBetweenShapes(reference_shape, current_shape).get_m();
0302     const PointTransformAffine tform_to_img      = unnormalizingTform(rect);
0303     const cv::Rect area                          = cv::Rect(0, 0, img_.size().width, img_.size().height);
0304     cv::Mat img(img_);
0305     feature_pixel_values.resize(reference_pixel_deltas.size());
0306
0307     for (unsigned long i = 0 ; i < feature_pixel_values.size() ; ++i)
0308     {
0309         // Compute the point in the current shape corresponding to the i-th pixel and
0310         // then map it from the normalized shape space into pixel space.
0311
0312         std::vector<float> p = tform_to_img(tform*reference_pixel_deltas[i] + location(current_shape, reference_pixel_anchor_idx[i]));
0313
0314         if (pointContained(area, p))
0315         {
0316             feature_pixel_values[i] = img.at<unsigned char>(std::round(p[1]), std::round(p[0]));
0317         }
0318         else
0319         {
0320             feature_pixel_values[i] = 0;
0321         }
0322     }
0323 }
0324
0325 // ------------------------------------------------------------------------------------
0326
0327 ShapePredictor::ShapePredictor()
0328 {
0329 }
0330
0331 unsigned long ShapePredictor::num_parts() const
0332 {
0333     return ((unsigned long)initial_shape.size() / 2);
0334 }
0335
0336 unsigned long ShapePredictor::num_features() const
0337 {
0338     unsigned long num = 0;
0339
0340     for (unsigned long iter = 0 ; iter < forests.size() ; ++iter)
0341     {
0342         for (unsigned long i = 0 ; i < forests[iter].size() ; ++i)
0343         {
0344             num += forests[iter][i].num_leaves();
0345         }
0346     }
0347
0348     return num;
0349 }
0350
0351 FullObjectDetection ShapePredictor::operator()(const cv::Mat& img, const cv::Rect& rect) const
0352 {
0353     std::vector<float> current_shape = initial_shape;
0354     std::vector<float> feature_pixel_values;
0355
0356     for (unsigned long iter = 0 ; iter < forests.size() ; ++iter)
0357     {
0358         extractFeaturePixelValues(img, rect, current_shape, initial_shape,
0359                                   anchor_idx[iter], deltas[iter], feature_pixel_values);
0360         unsigned long leaf_idx;
0361
0362         // Evaluate all the trees at this level of the cascade.
0363
0364         for (unsigned long i = 0 ; i < forests[iter].size() ; ++i)
0365         {
0366             current_shape = current_shape + forests[iter][i](feature_pixel_values, leaf_idx);
0367         }
0368     }
0369
0370     // Convert the current_shape into a full_object_detection
0371
0372     const PointTransformAffine tform_to_img = unnormalizingTform(rect);
0373     std::vector<std::vector<float> > parts(current_shape.size() / 2);
0374
0375     for (unsigned long i = 0 ; i < parts.size() ; ++i)
0376     {
0377         parts[i] = tform_to_img(location(current_shape, i));
0378     }
0379
0380     return FullObjectDetection(rect, parts);
0381 }
0382
0383 // ------------------------------------------------------------------------------------
0384
0385 QDataStream& operator << (QDataStream& dataStream, const ShapePredictor& shape)
0386 {
0387     dataStream << (unsigned int)shape.initial_shape.size();
0388
0389     for (unsigned int i = 0 ; i < shape.initial_shape.size() ; ++i)
0390     {
0391         dataStream << shape.initial_shape[i];
0392     }
0393
0394     dataStream<<(unsigned int)shape.forests.size();
0395     dataStream<<(unsigned int)shape.forests[0].size();
0396
0397     for (unsigned int i = 0 ; i < shape.forests.size() ; ++i)
0398     {
0399         for (unsigned int j = 0 ; j < shape.forests[i].size() ; ++j)
0400         {
0401             dataStream << shape.forests[i][j];
0402         }
0403     }
0404
0405     dataStream << (unsigned int)shape.anchor_idx.size();
0406     dataStream << (unsigned int)shape.anchor_idx[0].size();
0407
0408     for (unsigned int i = 0 ; i < shape.anchor_idx.size() ; ++i)
0409     {
0410         for (unsigned int j = 0 ; j < shape.anchor_idx[i].size() ; ++j)
0411         {
0412             dataStream << shape.anchor_idx[i][j];
0413         }
0414
0415     }
0416
0417     dataStream << (unsigned int)shape.deltas.size();
0418     dataStream << (unsigned int)shape.deltas[0].size();
0419
0420     for (unsigned int i = 0 ; i < shape.deltas.size() ; ++i)
0421     {
0422         for (unsigned int j = 0 ; j < shape.deltas[i].size() ; ++j)
0423         {
0424             dataStream << shape.deltas[i][j][0];
0425             dataStream << shape.deltas[i][j][1];
0426         }
0427     }
0428
0429     return dataStream;
0430 }
0431
0432 QDataStream& operator >> (QDataStream& dataStream, ShapePredictor& shape)
0433 {
0434     unsigned int size;
0435     dataStream >> size;
0436     shape.initial_shape.resize(size);
0437
0438     for (unsigned int i = 0 ; i < shape.initial_shape.size() ; ++i)
0439     {
0440         dataStream >> shape.initial_shape[i];
0441     }
0442
0443     dataStream >> size;
0444     shape.forests.resize(size);
0445     dataStream >> size;
0446
0447     for (unsigned int i = 0 ; i < shape.forests.size() ; ++i)
0448     {
0449         shape.forests[i].resize(size);
0450
0451         for (unsigned int j = 0 ; j < shape.forests[i].size() ; ++j)
0452         {
0453             dataStream >> shape.forests[i][j];
0454         }
0455     }
0456
0457     dataStream >> size;
0458     shape.anchor_idx.resize(size);
0459     dataStream >> size;
0460
0461     for (unsigned int i = 0 ; i < shape.anchor_idx.size() ; ++i)
0462     {
0463         shape.anchor_idx[i].resize(size);
0464
0465         for (unsigned int j = 0 ; j < shape.anchor_idx[i].size() ; ++j)
0466         {
0467             dataStream >> shape.anchor_idx[i][j];
0468         }
0469
0470     }
0471
0472     dataStream >> size;
0473     shape.deltas.resize(size);
0474     dataStream >> size;
0475
0476     for (unsigned int i = 0 ; i < shape.deltas.size() ; ++i)
0477     {
0478         shape.deltas[i].resize(size);
0479
0480         for (unsigned int j = 0 ; j < shape.deltas[i].size() ; ++j)
0481         {
0482             shape.deltas[i][j].resize(2);
0483             dataStream >> shape.deltas[i][j][0];
0484             dataStream >> shape.deltas[i][j][1];
0485         }
0486     }
0487
0488     return dataStream;
0489 }
0490
0491 } // namespace RedEye
0492
0493 } // namespace Digikam