preprocessing/shape-predictor/shapepredictor.h

0001 /* ============================================================
0002  *
0003  * This file is a part of digiKam project
0004  * https://www.digikam.org
0005  *
0006  * Date        : 16/08/2016
0007  * Description : A Shape predictor class that can predicts 68
0008  *               facial point including points surrounding faces
0009  *               eyes, that can be used for detecting human eyes
0010  *               positions, almost all codes are ported from dlib
0011  *               library (dlib.net/)
0012  *
0013  * SPDX-FileCopyrightText: 2016      by Omar Amin <Omar dot moh dot amin at gmail dot com>
0014  * SPDX-FileCopyrightText: 2019      by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com>
0015  * SPDX-FileCopyrightText: 2016-2024 by Gilles Caulier <caulier dot gilles at gmail dot com>
0016  *
0017  * SPDX-License-Identifier: GPL-2.0-or-later
0018  *
0019  * ============================================================ */
0020
0021 #ifndef DIGIKAM_SHAPE_PREDICTOR_H
0022 #define DIGIKAM_SHAPE_PREDICTOR_H
0023
0024 // C++ includes
0025
0026 #include <vector>
0027
0028 // Local includes
0029
0030 #include "digikam_opencv.h"
0031 #include "pointtransformaffine.h"
0032 #include "vectoroperations.h"
0033 #include "matrixoperations.h"
0034 #include "fullobjectdetection.h"
0035 #include "qdatastreamoverloads.h"
0036
0037 namespace Digikam
0038 {
0039
0040 namespace RedEye
0041 {
0042
0043 struct SplitFeature
0044 {
0045     unsigned long idx1;
0046     unsigned long idx2;
0047     float         thresh;
0048 };
0049
0050 QDataStream& operator << (QDataStream& dataStream, const SplitFeature& sp);
0051 QDataStream& operator >> (QDataStream& dataStream, SplitFeature& sp);
0052
0053 // NOTE: a tree is just a std::vector<RedEye::SplitFeature>. We use this function to navigate the tree nodes.
0054
0055 /**
0056  * Returns the index of the left child of the binary tree node idx
0057  */
0058 unsigned long left_child(unsigned long idx);
0059
0060 /**
0061  * Returns the index of the left child of the binary tree node idx
0062  */
0063 unsigned long right_child(unsigned long idx);
0064
0065 // ----------------------------------------------------------------------------------------
0066
0067 struct RegressionTree
0068 {
0069     std::vector<SplitFeature>        splits;
0070     std::vector<std::vector<float> > leaf_values;
0071
0072     unsigned long num_leaves() const;
0073
0074     /**
0075      * requires
0076      *     - All the index values in splits are less than feature_pixel_values.size()
0077      *     - leaf_values.size() is a power of 2.
0078      *          (i.e. we require a tree with all the levels fully filled out.
0079      *     - leaf_values.size() == splits.size()+1
0080      *          (i.e. there needs to be the right number of leaves given the number of splits in the tree)
0081      * ensures
0082      *      - runs through the tree and returns the vector at the leaf we end up in.
0083      *      - #i == the selected leaf node index.
0084      */
0085     const std::vector<float>& operator()(const std::vector<float>& feature_pixel_values,
0086                                          unsigned long& i) const;
0087 };
0088
0089 QDataStream& operator << (QDataStream& dataStream, const RegressionTree& regtree);
0090 QDataStream& operator >> (QDataStream& dataStream, RegressionTree& regtree);
0091
0092 /**
0093  * requires
0094  *      - idx < shape.size()/2
0095  *      - shape.size()%2 == 0
0096  *  ensures
0097  *      - returns the idx-th point from the shape vector.
0098  */
0099 template<class T>
0100 inline std::vector<T> location(const std::vector<T>& shape,
0101                                unsigned long idx)
0102 {
0103     std::vector<T> temp(2);
0104     temp[0] = shape[idx * 2    ];
0105     temp[1] = shape[idx * 2 + 1];
0106
0107     return temp;
0108 }
0109
0110 // ------------------------------------------------------------------------------------
0111
0112 unsigned long nearestShapePoint(const std::vector<float>& shape,
0113                                 const std::vector<float>& pt);
0114
0115 // ------------------------------------------------------------------------------------
0116
0117 /**
0118  *  requires
0119  *      - shape.size()%2 == 0
0120  *      - shape.size() > 0
0121  *  ensures
0122  *      - #anchor_idx.size() == pixel_coordinates.size()
0123  *      - #deltas.size()     == pixel_coordinates.size()
0124  *      - for all valid i:
0125  *      - pixel_coordinates[i] == location(shape,#anchor_idx[i]) + #deltas[i]
0126  */
0127 void createShapeRelativeEncoding(const std::vector<float>& shape,
0128                                  const std::vector<std::vector<float> >& pixel_coordinates,
0129                                  std::vector<unsigned long>& anchor_idx,
0130                                  std::vector<std::vector<float> >& deltas);
0131
0132 // ------------------------------------------------------------------------------------
0133
0134 PointTransformAffine findTformBetweenShapes(const std::vector<float>& from_shape,
0135                                             const std::vector<float>& to_shape);
0136
0137 // ------------------------------------------------------------------------------------
0138
0139 /**
0140  * Returns a transform that maps rect.tl_corner() to (0, 0) and rect.br_corner() to (1,1).
0141  */
0142 PointTransformAffine normalizingTform(const cv::Rect& rect);
0143
0144 // ------------------------------------------------------------------------------------
0145
0146 /**
0147  * returns a transform that maps (0, 0) to rect.tl_corner() and (1,1) to rect.br_corner().
0148  */
0149 PointTransformAffine unnormalizingTform(const cv::Rect& rect);
0150 bool pointContained(const cv::Rect& rect, const std::vector<float>& point);
0151
0152 // ------------------------------------------------------------------------------------
0153
0154 /**
0155  * requires
0156  *      - image_type == an image object that implements the interface defined in
0157  *          dlib/image_processing/generic_image.h
0158  *      - reference_pixel_anchor_idx.size() == reference_pixel_deltas.size()
0159  *      - current_shape.size() == reference_shape.size()
0160  *      - reference_shape.size()%2 == 0
0161  *      - max(mat(reference_pixel_anchor_idx)) < reference_shape.size()/2
0162  *  ensures
0163  *      - #feature_pixel_values.size() == reference_pixel_deltas.size()
0164  *      - for all valid i:
0165  *          - #feature_pixel_values[i] == the value of the pixel in img_ that
0166  *              corresponds to the pixel identified by reference_pixel_anchor_idx[i]
0167  *              and reference_pixel_deltas[i] when the pixel is located relative to
0168  *              current_shape rather than reference_shape.
0169  */
0170 void extractFeaturePixelValues(const cv::Mat& img_,
0171                                const cv::Rect& rect,
0172                                const std::vector<float>& current_shape,
0173                                const std::vector<float>& reference_shape,
0174                                const std::vector<unsigned long>& reference_pixel_anchor_idx,
0175                                const std::vector<std::vector<float> >& reference_pixel_deltas,
0176                                std::vector<float>& feature_pixel_values);
0177
0178 // ------------------------------------------------------------------------------------
0179
0180 class ShapePredictor
0181 {
0182 public:
0183
0184     explicit ShapePredictor();
0185
0186     unsigned long num_parts()                            const;
0187     unsigned long num_features()                         const;
0188
0189     FullObjectDetection operator()(const cv::Mat& img,
0190                                    const cv::Rect& rect) const;
0191
0192 public:
0193
0194     std::vector<float>                                initial_shape;
0195     std::vector<std::vector<RedEye::RegressionTree> > forests;
0196     std::vector<std::vector<unsigned long> >          anchor_idx;
0197     std::vector<std::vector<std::vector<float> > >    deltas;
0198 };
0199
0200 QDataStream& operator << (QDataStream& dataStream, const ShapePredictor& shape);
0201 QDataStream& operator >> (QDataStream& dataStream, ShapePredictor& shape);
0202
0203 } // namespace RedEye
0204
0205 } // namespace Digikam
0206
0207 #endif // DIGIKAM_SHAPE_PREDICTOR_H