File indexing completed on 2025-03-09 03:55:00
0001 /* ============================================================ 0002 * 0003 * This file is a part of digiKam project 0004 * https://www.digikam.org 0005 * 0006 * Date : 16/08/2016 0007 * Description : A Shape predictor class that can predicts 68 0008 * facial point including points surrounding faces 0009 * eyes, that can be used for detecting human eyes 0010 * positions, almost all codes are ported from dlib 0011 * library (dlib.net/) 0012 * 0013 * SPDX-FileCopyrightText: 2016 by Omar Amin <Omar dot moh dot amin at gmail dot com> 0014 * SPDX-FileCopyrightText: 2019 by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com> 0015 * SPDX-FileCopyrightText: 2016-2024 by Gilles Caulier <caulier dot gilles at gmail dot com> 0016 * 0017 * SPDX-License-Identifier: GPL-2.0-or-later 0018 * 0019 * ============================================================ */ 0020 0021 #ifndef DIGIKAM_SHAPE_PREDICTOR_H 0022 #define DIGIKAM_SHAPE_PREDICTOR_H 0023 0024 // C++ includes 0025 0026 #include <vector> 0027 0028 // Local includes 0029 0030 #include "digikam_opencv.h" 0031 #include "pointtransformaffine.h" 0032 #include "vectoroperations.h" 0033 #include "matrixoperations.h" 0034 #include "fullobjectdetection.h" 0035 #include "qdatastreamoverloads.h" 0036 0037 namespace Digikam 0038 { 0039 0040 namespace RedEye 0041 { 0042 0043 struct SplitFeature 0044 { 0045 unsigned long idx1; 0046 unsigned long idx2; 0047 float thresh; 0048 }; 0049 0050 QDataStream& operator << (QDataStream& dataStream, const SplitFeature& sp); 0051 QDataStream& operator >> (QDataStream& dataStream, SplitFeature& sp); 0052 0053 // NOTE: a tree is just a std::vector<RedEye::SplitFeature>. We use this function to navigate the tree nodes. 0054 0055 /** 0056 * Returns the index of the left child of the binary tree node idx 0057 */ 0058 unsigned long left_child(unsigned long idx); 0059 0060 /** 0061 * Returns the index of the left child of the binary tree node idx 0062 */ 0063 unsigned long right_child(unsigned long idx); 0064 0065 // ---------------------------------------------------------------------------------------- 0066 0067 struct RegressionTree 0068 { 0069 std::vector<SplitFeature> splits; 0070 std::vector<std::vector<float> > leaf_values; 0071 0072 unsigned long num_leaves() const; 0073 0074 /** 0075 * requires 0076 * - All the index values in splits are less than feature_pixel_values.size() 0077 * - leaf_values.size() is a power of 2. 0078 * (i.e. we require a tree with all the levels fully filled out. 0079 * - leaf_values.size() == splits.size()+1 0080 * (i.e. there needs to be the right number of leaves given the number of splits in the tree) 0081 * ensures 0082 * - runs through the tree and returns the vector at the leaf we end up in. 0083 * - #i == the selected leaf node index. 0084 */ 0085 const std::vector<float>& operator()(const std::vector<float>& feature_pixel_values, 0086 unsigned long& i) const; 0087 }; 0088 0089 QDataStream& operator << (QDataStream& dataStream, const RegressionTree& regtree); 0090 QDataStream& operator >> (QDataStream& dataStream, RegressionTree& regtree); 0091 0092 /** 0093 * requires 0094 * - idx < shape.size()/2 0095 * - shape.size()%2 == 0 0096 * ensures 0097 * - returns the idx-th point from the shape vector. 0098 */ 0099 template<class T> 0100 inline std::vector<T> location(const std::vector<T>& shape, 0101 unsigned long idx) 0102 { 0103 std::vector<T> temp(2); 0104 temp[0] = shape[idx * 2 ]; 0105 temp[1] = shape[idx * 2 + 1]; 0106 0107 return temp; 0108 } 0109 0110 // ------------------------------------------------------------------------------------ 0111 0112 unsigned long nearestShapePoint(const std::vector<float>& shape, 0113 const std::vector<float>& pt); 0114 0115 // ------------------------------------------------------------------------------------ 0116 0117 /** 0118 * requires 0119 * - shape.size()%2 == 0 0120 * - shape.size() > 0 0121 * ensures 0122 * - #anchor_idx.size() == pixel_coordinates.size() 0123 * - #deltas.size() == pixel_coordinates.size() 0124 * - for all valid i: 0125 * - pixel_coordinates[i] == location(shape,#anchor_idx[i]) + #deltas[i] 0126 */ 0127 void createShapeRelativeEncoding(const std::vector<float>& shape, 0128 const std::vector<std::vector<float> >& pixel_coordinates, 0129 std::vector<unsigned long>& anchor_idx, 0130 std::vector<std::vector<float> >& deltas); 0131 0132 // ------------------------------------------------------------------------------------ 0133 0134 PointTransformAffine findTformBetweenShapes(const std::vector<float>& from_shape, 0135 const std::vector<float>& to_shape); 0136 0137 // ------------------------------------------------------------------------------------ 0138 0139 /** 0140 * Returns a transform that maps rect.tl_corner() to (0, 0) and rect.br_corner() to (1,1). 0141 */ 0142 PointTransformAffine normalizingTform(const cv::Rect& rect); 0143 0144 // ------------------------------------------------------------------------------------ 0145 0146 /** 0147 * returns a transform that maps (0, 0) to rect.tl_corner() and (1,1) to rect.br_corner(). 0148 */ 0149 PointTransformAffine unnormalizingTform(const cv::Rect& rect); 0150 bool pointContained(const cv::Rect& rect, const std::vector<float>& point); 0151 0152 // ------------------------------------------------------------------------------------ 0153 0154 /** 0155 * requires 0156 * - image_type == an image object that implements the interface defined in 0157 * dlib/image_processing/generic_image.h 0158 * - reference_pixel_anchor_idx.size() == reference_pixel_deltas.size() 0159 * - current_shape.size() == reference_shape.size() 0160 * - reference_shape.size()%2 == 0 0161 * - max(mat(reference_pixel_anchor_idx)) < reference_shape.size()/2 0162 * ensures 0163 * - #feature_pixel_values.size() == reference_pixel_deltas.size() 0164 * - for all valid i: 0165 * - #feature_pixel_values[i] == the value of the pixel in img_ that 0166 * corresponds to the pixel identified by reference_pixel_anchor_idx[i] 0167 * and reference_pixel_deltas[i] when the pixel is located relative to 0168 * current_shape rather than reference_shape. 0169 */ 0170 void extractFeaturePixelValues(const cv::Mat& img_, 0171 const cv::Rect& rect, 0172 const std::vector<float>& current_shape, 0173 const std::vector<float>& reference_shape, 0174 const std::vector<unsigned long>& reference_pixel_anchor_idx, 0175 const std::vector<std::vector<float> >& reference_pixel_deltas, 0176 std::vector<float>& feature_pixel_values); 0177 0178 // ------------------------------------------------------------------------------------ 0179 0180 class ShapePredictor 0181 { 0182 public: 0183 0184 explicit ShapePredictor(); 0185 0186 unsigned long num_parts() const; 0187 unsigned long num_features() const; 0188 0189 FullObjectDetection operator()(const cv::Mat& img, 0190 const cv::Rect& rect) const; 0191 0192 public: 0193 0194 std::vector<float> initial_shape; 0195 std::vector<std::vector<RedEye::RegressionTree> > forests; 0196 std::vector<std::vector<unsigned long> > anchor_idx; 0197 std::vector<std::vector<std::vector<float> > > deltas; 0198 }; 0199 0200 QDataStream& operator << (QDataStream& dataStream, const ShapePredictor& shape); 0201 QDataStream& operator >> (QDataStream& dataStream, ShapePredictor& shape); 0202 0203 } // namespace RedEye 0204 0205 } // namespace Digikam 0206 0207 #endif // DIGIKAM_SHAPE_PREDICTOR_H