File indexing completed on 2024-12-22 04:09:50
0001 ///////////////////////////////////////////////////////////////////////////// 0002 // einspline: a library for creating and evaluating B-splines // 0003 // Copyright (C) 2007 Kenneth P. Esler, Jr. // 0004 // // 0005 // This program is free software; you can redistribute it and/or modify // 0006 // it under the terms of the GNU General Public License as published by // 0007 // the Free Software Foundation; either version 2 of the License, or // 0008 // (at your option) any later version. // 0009 // // 0010 // This program is distributed in the hope that it will be useful, // 0011 // but WITHOUT ANY WARRANTY; without even the implied warranty of // 0012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // 0013 // GNU General Public License for more details. // 0014 // // 0015 // You should have received a copy of the GNU General Public License // 0016 // along with this program; if not, write to the Free Software // 0017 // Foundation, Inc., 51 Franklin Street, Fifth Floor, // 0018 // Boston, MA 02110-1301 USA // 0019 ///////////////////////////////////////////////////////////////////////////// 0020 0021 #ifndef MULTI_BSPLINE_EVAL_STD_S_H 0022 #define MULTI_BSPLINE_EVAL_STD_S_H 0023 0024 #include <math.h> 0025 #include <stdio.h> 0026 #include "multi_bspline_structs.h" 0027 0028 extern const float* restrict Af; 0029 extern const float* restrict dAf; 0030 extern const float* restrict d2Af; 0031 extern const float* restrict d3Af; 0032 0033 /************************************************************/ 0034 /* 1D double-precision, real evaulation functions */ 0035 /************************************************************/ 0036 inline void 0037 eval_multi_UBspline_1d_s (multi_UBspline_1d_s *spline, 0038 double x, 0039 float* restrict vals) 0040 { 0041 x -= spline->x_grid.start; 0042 float ux = x*spline->x_grid.delta_inv; 0043 float ipartx, tx; 0044 tx = modff (ux, &ipartx); int ix = (int) ipartx; 0045 0046 float tpx[4], a[4]; 0047 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0048 float* restrict coefs = spline->coefs; 0049 0050 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); 0051 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); 0052 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); 0053 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); 0054 0055 int xs = spline->x_stride; 0056 0057 for (int n=0; n<spline->num_splines; n++) 0058 vals[n] = 0.0; 0059 0060 for (int i=0; i<4; i++) { 0061 float* restrict coefs = spline->coefs + ((ix+i)*xs); 0062 for (int n=0; n<spline->num_splines; n++) 0063 vals[n] += a[i] * coefs[n]; 0064 } 0065 } 0066 0067 0068 0069 inline void 0070 eval_multi_UBspline_1d_s_vg (multi_UBspline_1d_s *spline, 0071 double x, 0072 float* restrict vals, 0073 float* restrict grads) 0074 { 0075 x -= spline->x_grid.start; 0076 float ux = x*spline->x_grid.delta_inv; 0077 float ipartx, tx; 0078 tx = modff (ux, &ipartx); int ix = (int) ipartx; 0079 0080 float tpx[4], a[4], da[4]; 0081 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0082 float* restrict coefs = spline->coefs; 0083 0084 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); 0085 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); 0086 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); 0087 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); 0088 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); 0089 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); 0090 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); 0091 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); 0092 0093 int xs = spline->x_stride; 0094 0095 for (int n=0; n<spline->num_splines; n++) { 0096 vals[n] = 0.0; 0097 grads[n] = 0.0; 0098 } 0099 0100 for (int i=0; i<4; i++) { 0101 float* restrict coefs = spline->coefs + ((ix+i)*xs); 0102 for (int n=0; n<spline->num_splines; n++) { 0103 vals[n] += a[i] * coefs[n]; 0104 grads[n] += da[i] * coefs[n]; 0105 } 0106 } 0107 0108 float dxInv = spline->x_grid.delta_inv; 0109 for (int n=0; n<spline->num_splines; n++) 0110 grads[n] *= dxInv; 0111 } 0112 0113 0114 inline void 0115 eval_multi_UBspline_1d_s_vgl (multi_UBspline_1d_s *spline, 0116 double x, 0117 float* restrict vals, 0118 float* restrict grads, 0119 float* restrict lapl) 0120 { 0121 x -= spline->x_grid.start; 0122 float ux = x*spline->x_grid.delta_inv; 0123 float ipartx, tx; 0124 tx = modff (ux, &ipartx); int ix = (int) ipartx; 0125 0126 float tpx[4], a[4], da[4], d2a[4]; 0127 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0128 float* restrict coefs = spline->coefs; 0129 0130 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); 0131 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); 0132 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); 0133 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); 0134 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); 0135 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); 0136 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); 0137 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); 0138 d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); 0139 d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); 0140 d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); 0141 d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); 0142 0143 int xs = spline->x_stride; 0144 0145 for (int n=0; n<spline->num_splines; n++) { 0146 vals[n] = 0.0; 0147 grads[n] = 0.0; 0148 lapl[n] = 0.0; 0149 } 0150 0151 for (int i=0; i<4; i++) { 0152 float* restrict coefs = spline->coefs + ((ix+i)*xs); 0153 for (int n=0; n<spline->num_splines; n++) { 0154 vals[n] += a[i] * coefs[n]; 0155 grads[n] += da[i] * coefs[n]; 0156 lapl[n] += d2a[i] * coefs[n]; 0157 } 0158 } 0159 0160 float dxInv = spline->x_grid.delta_inv; 0161 for (int n=0; n<spline->num_splines; n++) { 0162 grads[n] *= dxInv; 0163 lapl [n] *= dxInv*dxInv; 0164 } 0165 } 0166 0167 inline void 0168 eval_multi_UBspline_1d_s_vgh (multi_UBspline_1d_s *spline, 0169 double x, 0170 float* restrict vals, 0171 float* restrict grads, 0172 float* restrict hess) 0173 { 0174 eval_multi_UBspline_1d_s_vgl (spline, x, vals, grads, hess); 0175 } 0176 0177 0178 /************************************************************/ 0179 /* 2D double-precision, real evaulation functions */ 0180 /************************************************************/ 0181 inline void 0182 eval_multi_UBspline_2d_s (multi_UBspline_2d_s *spline, 0183 double x, double y, 0184 float* restrict vals) 0185 { 0186 x -= spline->x_grid.start; 0187 y -= spline->y_grid.start; 0188 float ux = x*spline->x_grid.delta_inv; 0189 float uy = y*spline->y_grid.delta_inv; 0190 float ipartx, iparty, tx, ty; 0191 tx = modff (ux, &ipartx); int ix = (int) ipartx; 0192 ty = modff (uy, &iparty); int iy = (int) iparty; 0193 0194 float tpx[4], tpy[4], a[4], b[4]; 0195 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0196 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0197 float* restrict coefs = spline->coefs; 0198 0199 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); 0200 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); 0201 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); 0202 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); 0203 0204 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); 0205 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); 0206 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); 0207 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); 0208 0209 int xs = spline->x_stride; 0210 int ys = spline->y_stride; 0211 0212 for (int n=0; n<spline->num_splines; n++) 0213 vals[n] = 0.0; 0214 0215 for (int i=0; i<4; i++) 0216 for (int j=0; j<4; j++) { 0217 float prefactor = a[i]*b[j]; 0218 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); 0219 for (int n=0; n<spline->num_splines; n++) 0220 vals[n] += prefactor*coefs[n]; 0221 } 0222 } 0223 0224 0225 inline void 0226 eval_multi_UBspline_2d_s_vg (multi_UBspline_2d_s *spline, 0227 double x, double y, 0228 float* restrict vals, 0229 float* restrict grads) 0230 { 0231 x -= spline->x_grid.start; 0232 y -= spline->y_grid.start; 0233 float ux = x*spline->x_grid.delta_inv; 0234 float uy = y*spline->y_grid.delta_inv; 0235 float ipartx, iparty, tx, ty; 0236 tx = modff (ux, &ipartx); int ix = (int) ipartx; 0237 ty = modff (uy, &iparty); int iy = (int) iparty; 0238 0239 float tpx[4], tpy[4], a[4], b[4], da[4], db[4]; 0240 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0241 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0242 float* restrict coefs = spline->coefs; 0243 0244 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); 0245 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); 0246 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); 0247 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); 0248 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); 0249 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); 0250 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); 0251 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); 0252 0253 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); 0254 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); 0255 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); 0256 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); 0257 db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); 0258 db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); 0259 db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); 0260 db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); 0261 0262 int xs = spline->x_stride; 0263 int ys = spline->y_stride; 0264 0265 for (int n=0; n<spline->num_splines; n++) { 0266 vals[n] = 0.0; 0267 grads[2*n+0] = grads[2*n+1] = grads[2*n+2] = 0.0; 0268 } 0269 0270 for (int i=0; i<4; i++) 0271 for (int j=0; j<4; j++) { 0272 float ab = a[i]*b[j]; 0273 float dab[2]; 0274 dab[0] = da[i]* b[j]; 0275 dab[1] = a[i]*db[j]; 0276 0277 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); 0278 for (int n=0; n<spline->num_splines; n++) { 0279 vals [n] += ab *coefs[n]; 0280 grads[2*n+0] += dab[0]*coefs[n]; 0281 grads[2*n+1] += dab[1]*coefs[n]; 0282 } 0283 } 0284 0285 float dxInv = spline->x_grid.delta_inv; 0286 float dyInv = spline->y_grid.delta_inv; 0287 for (int n=0; n<spline->num_splines; n++) { 0288 grads[2*n+0] *= dxInv; 0289 grads[2*n+1] *= dyInv; 0290 } 0291 } 0292 0293 inline void 0294 eval_multi_UBspline_2d_s_vgl (multi_UBspline_2d_s *spline, 0295 double x, double y, 0296 float* restrict vals, 0297 float* restrict grads, 0298 float* restrict lapl) 0299 { 0300 x -= spline->x_grid.start; 0301 y -= spline->y_grid.start; 0302 float ux = x*spline->x_grid.delta_inv; 0303 float uy = y*spline->y_grid.delta_inv; 0304 float ipartx, iparty, tx, ty; 0305 tx = modff (ux, &ipartx); int ix = (int) ipartx; 0306 ty = modff (uy, &iparty); int iy = (int) iparty; 0307 0308 float tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; 0309 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0310 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0311 float* restrict coefs = spline->coefs; 0312 0313 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); 0314 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); 0315 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); 0316 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); 0317 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); 0318 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); 0319 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); 0320 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); 0321 d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); 0322 d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); 0323 d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); 0324 d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); 0325 0326 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); 0327 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); 0328 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); 0329 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); 0330 db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); 0331 db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); 0332 db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); 0333 db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); 0334 d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); 0335 d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); 0336 d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); 0337 d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); 0338 0339 int xs = spline->x_stride; 0340 int ys = spline->y_stride; 0341 0342 float lapl2[2*spline->num_splines]; 0343 for (int n=0; n<spline->num_splines; n++) { 0344 vals[n] = 0.0; 0345 grads[2*n+0] = grads[2*n+1] = 0.0; 0346 lapl2[2*n+0] = lapl2[2*n+1] = 0.0; 0347 } 0348 0349 for (int i=0; i<4; i++) 0350 for (int j=0; j<4; j++) { 0351 float ab = a[i]*b[j]; 0352 float dab[2], d2ab[2]; 0353 dab[0] = da[i]* b[j]; 0354 dab[1] = a[i]*db[j]; 0355 d2ab[0] = d2a[i]* b[j]; 0356 d2ab[1] = a[i]*d2b[j]; 0357 0358 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); 0359 for (int n=0; n<spline->num_splines; n++) { 0360 vals[n] += ab *coefs[n]; 0361 grads[2*n+0] += dab[0]*coefs[n]; 0362 grads[2*n+1] += dab[1]*coefs[n]; 0363 lapl2[2*n+0] += d2ab[0]*coefs[n]; 0364 lapl2[2*n+1] += d2ab[1]*coefs[n]; 0365 } 0366 } 0367 0368 float dxInv = spline->x_grid.delta_inv; 0369 float dyInv = spline->y_grid.delta_inv; 0370 for (int n=0; n<spline->num_splines; n++) { 0371 grads[2*n+0] *= dxInv; 0372 grads[2*n+1] *= dyInv; 0373 lapl2[2*n+0] *= dxInv*dxInv; 0374 lapl2[2*n+1] *= dyInv*dyInv; 0375 lapl[n] = lapl2[2*n+0] + lapl2[2*n+1]; 0376 } 0377 } 0378 0379 0380 0381 0382 inline void 0383 eval_multi_UBspline_2d_s_vgh (multi_UBspline_2d_s *spline, 0384 double x, double y, 0385 float* restrict vals, 0386 float* restrict grads, 0387 float* restrict hess) 0388 { 0389 x -= spline->x_grid.start; 0390 y -= spline->y_grid.start; 0391 float ux = x*spline->x_grid.delta_inv; 0392 float uy = y*spline->y_grid.delta_inv; 0393 float ipartx, iparty, tx, ty; 0394 tx = modff (ux, &ipartx); int ix = (int) ipartx; 0395 ty = modff (uy, &iparty); int iy = (int) iparty; 0396 0397 float tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; 0398 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0399 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0400 float* restrict coefs = spline->coefs; 0401 0402 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); 0403 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); 0404 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); 0405 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); 0406 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); 0407 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); 0408 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); 0409 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); 0410 d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); 0411 d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); 0412 d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); 0413 d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); 0414 0415 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); 0416 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); 0417 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); 0418 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); 0419 db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); 0420 db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); 0421 db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); 0422 db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); 0423 d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); 0424 d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); 0425 d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); 0426 d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); 0427 0428 int xs = spline->x_stride; 0429 int ys = spline->y_stride; 0430 0431 for (int n=0; n<spline->num_splines; n++) { 0432 vals[n] = 0.0; 0433 grads[2*n+0] = grads[2*n+1] = 0.0; 0434 for (int i=0; i<4; i++) 0435 hess[4*n+i] = 0.0; 0436 } 0437 0438 for (int i=0; i<4; i++) 0439 for (int j=0; j<4; j++){ 0440 float ab = a[i]*b[j]; 0441 float dab[2], d2ab[3]; 0442 dab[0] = da[i]* b[j]; 0443 dab[1] = a[i]*db[j]; 0444 d2ab[0] = d2a[i] * b[j]; 0445 d2ab[1] = da[i] * db[j]; 0446 d2ab[2] = a[i] * d2b[j]; 0447 0448 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); 0449 for (int n=0; n<spline->num_splines; n++) { 0450 vals[n] += ab *coefs[n]; 0451 grads[2*n+0] += dab[0]*coefs[n]; 0452 grads[2*n+1] += dab[1]*coefs[n]; 0453 hess [4*n+0] += d2ab[0]*coefs[n]; 0454 hess [4*n+1] += d2ab[1]*coefs[n]; 0455 hess [4*n+3] += d2ab[2]*coefs[n]; 0456 } 0457 } 0458 0459 float dxInv = spline->x_grid.delta_inv; 0460 float dyInv = spline->y_grid.delta_inv; 0461 for (int n=0; n<spline->num_splines; n++) { 0462 grads[2*n+0] *= dxInv; 0463 grads[2*n+1] *= dyInv; 0464 hess[4*n+0] *= dxInv*dxInv; 0465 hess[4*n+1] *= dxInv*dyInv; 0466 hess[4*n+3] *= dyInv*dyInv; 0467 // Copy hessian elements into lower half of 3x3 matrix 0468 hess[4*n+2] = hess[4*n+1]; 0469 } 0470 } 0471 0472 0473 0474 0475 /************************************************************/ 0476 /* 3D double-precision, real evaulation functions */ 0477 /************************************************************/ 0478 inline void 0479 eval_multi_UBspline_3d_s (multi_UBspline_3d_s *spline, 0480 double x, double y, double z, 0481 float* restrict vals) 0482 { 0483 x -= spline->x_grid.start; 0484 y -= spline->y_grid.start; 0485 z -= spline->z_grid.start; 0486 float ux = x*spline->x_grid.delta_inv; 0487 float uy = y*spline->y_grid.delta_inv; 0488 float uz = z*spline->z_grid.delta_inv; 0489 float ipartx, iparty, ipartz, tx, ty, tz; 0490 tx = modff (ux, &ipartx); int ix = (int) ipartx; 0491 ty = modff (uy, &iparty); int iy = (int) iparty; 0492 tz = modff (uz, &ipartz); int iz = (int) ipartz; 0493 0494 float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4]; 0495 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0496 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0497 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; 0498 float* restrict coefs = spline->coefs; 0499 0500 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); 0501 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); 0502 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); 0503 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); 0504 0505 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); 0506 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); 0507 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); 0508 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); 0509 0510 c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); 0511 c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); 0512 c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); 0513 c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); 0514 0515 int xs = spline->x_stride; 0516 int ys = spline->y_stride; 0517 int zs = spline->z_stride; 0518 0519 for (int n=0; n<spline->num_splines; n++) 0520 vals[n] = 0.0; 0521 0522 for (int i=0; i<4; i++) 0523 for (int j=0; j<4; j++) 0524 for (int k=0; k<4; k++) { 0525 float abc = a[i]*b[j]*c[k]; 0526 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); 0527 for (int n=0; n<spline->num_splines; n++) 0528 vals[n] += abc*coefs[n]; 0529 } 0530 } 0531 0532 0533 inline void 0534 eval_multi_UBspline_3d_s_vg (multi_UBspline_3d_s *spline, 0535 double x, double y, double z, 0536 float* restrict vals, 0537 float* restrict grads) 0538 { 0539 x -= spline->x_grid.start; 0540 y -= spline->y_grid.start; 0541 z -= spline->z_grid.start; 0542 float ux = x*spline->x_grid.delta_inv; 0543 float uy = y*spline->y_grid.delta_inv; 0544 float uz = z*spline->z_grid.delta_inv; 0545 float ipartx, iparty, ipartz, tx, ty, tz; 0546 tx = modff (ux, &ipartx); int ix = (int) ipartx; 0547 ty = modff (uy, &iparty); int iy = (int) iparty; 0548 tz = modff (uz, &ipartz); int iz = (int) ipartz; 0549 0550 float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], 0551 da[4], db[4], dc[4]; 0552 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0553 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0554 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; 0555 float* restrict coefs = spline->coefs; 0556 0557 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); 0558 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); 0559 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); 0560 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); 0561 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); 0562 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); 0563 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); 0564 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); 0565 0566 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); 0567 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); 0568 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); 0569 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); 0570 db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); 0571 db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); 0572 db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); 0573 db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); 0574 0575 c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); 0576 c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); 0577 c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); 0578 c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); 0579 dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); 0580 dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); 0581 dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); 0582 dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); 0583 0584 int xs = spline->x_stride; 0585 int ys = spline->y_stride; 0586 int zs = spline->z_stride; 0587 0588 for (int n=0; n<spline->num_splines; n++) { 0589 vals[n] = 0.0; 0590 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; 0591 } 0592 0593 for (int i=0; i<4; i++) 0594 for (int j=0; j<4; j++) 0595 for (int k=0; k<4; k++) { 0596 float abc = a[i]*b[j]*c[k]; 0597 float dabc[3]; 0598 dabc[0] = da[i]* b[j]* c[k]; 0599 dabc[1] = a[i]*db[j]* c[k]; 0600 dabc[2] = a[i]* b[j]*dc[k]; 0601 0602 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); 0603 for (int n=0; n<spline->num_splines; n++) { 0604 vals[n] += abc *coefs[n]; 0605 grads[3*n+0] += dabc[0]*coefs[n]; 0606 grads[3*n+1] += dabc[1]*coefs[n]; 0607 grads[3*n+2] += dabc[2]*coefs[n]; 0608 } 0609 } 0610 0611 float dxInv = spline->x_grid.delta_inv; 0612 float dyInv = spline->y_grid.delta_inv; 0613 float dzInv = spline->z_grid.delta_inv; 0614 for (int n=0; n<spline->num_splines; n++) { 0615 grads[3*n+0] *= dxInv; 0616 grads[3*n+1] *= dyInv; 0617 grads[3*n+2] *= dzInv; 0618 } 0619 } 0620 0621 0622 0623 inline void 0624 eval_multi_UBspline_3d_s_vgl (multi_UBspline_3d_s *spline, 0625 double x, double y, double z, 0626 float* restrict vals, 0627 float* restrict grads, 0628 float* restrict lapl) 0629 { 0630 x -= spline->x_grid.start; 0631 y -= spline->y_grid.start; 0632 z -= spline->z_grid.start; 0633 float ux = x*spline->x_grid.delta_inv; 0634 float uy = y*spline->y_grid.delta_inv; 0635 float uz = z*spline->z_grid.delta_inv; 0636 float ipartx, iparty, ipartz, tx, ty, tz; 0637 tx = modff (ux, &ipartx); int ix = (int) ipartx; 0638 ty = modff (uy, &iparty); int iy = (int) iparty; 0639 tz = modff (uz, &ipartz); int iz = (int) ipartz; 0640 0641 float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], 0642 da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; 0643 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0644 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0645 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; 0646 float* restrict coefs = spline->coefs; 0647 0648 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); 0649 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); 0650 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); 0651 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); 0652 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); 0653 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); 0654 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); 0655 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); 0656 d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); 0657 d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); 0658 d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); 0659 d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); 0660 0661 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); 0662 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); 0663 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); 0664 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); 0665 db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); 0666 db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); 0667 db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); 0668 db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); 0669 d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); 0670 d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); 0671 d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); 0672 d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); 0673 0674 c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); 0675 c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); 0676 c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); 0677 c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); 0678 dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); 0679 dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); 0680 dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); 0681 dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); 0682 d2c[0] = (d2Af[ 0]*tpz[0] + d2Af[ 1]*tpz[1] + d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]); 0683 d2c[1] = (d2Af[ 4]*tpz[0] + d2Af[ 5]*tpz[1] + d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]); 0684 d2c[2] = (d2Af[ 8]*tpz[0] + d2Af[ 9]*tpz[1] + d2Af[10]*tpz[2] + d2Af[11]*tpz[3]); 0685 d2c[3] = (d2Af[12]*tpz[0] + d2Af[13]*tpz[1] + d2Af[14]*tpz[2] + d2Af[15]*tpz[3]); 0686 0687 int xs = spline->x_stride; 0688 int ys = spline->y_stride; 0689 int zs = spline->z_stride; 0690 0691 float lapl3[3*spline->num_splines]; 0692 for (int n=0; n<spline->num_splines; n++) { 0693 vals[n] = 0.0; 0694 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; 0695 lapl3[3*n+0] = lapl3[3*n+1] = lapl3[3*n+2] = 0.0; 0696 } 0697 0698 for (int i=0; i<4; i++) 0699 for (int j=0; j<4; j++) 0700 for (int k=0; k<4; k++) { 0701 float abc = a[i]*b[j]*c[k]; 0702 float dabc[3], d2abc[3]; 0703 dabc[0] = da[i]* b[j]* c[k]; 0704 dabc[1] = a[i]*db[j]* c[k]; 0705 dabc[2] = a[i]* b[j]*dc[k]; 0706 d2abc[0] = d2a[i]* b[j]* c[k]; 0707 d2abc[1] = a[i]*d2b[j]* c[k]; 0708 d2abc[2] = a[i]* b[j]*d2c[k]; 0709 0710 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); 0711 for (int n=0; n<spline->num_splines; n++) { 0712 vals[n] += abc *coefs[n]; 0713 grads[3*n+0] += dabc[0]*coefs[n]; 0714 grads[3*n+1] += dabc[1]*coefs[n]; 0715 grads[3*n+2] += dabc[2]*coefs[n]; 0716 lapl3[3*n+0] += d2abc[0]*coefs[n]; 0717 lapl3[3*n+1] += d2abc[1]*coefs[n]; 0718 lapl3[3*n+2] += d2abc[2]*coefs[n]; 0719 } 0720 } 0721 0722 float dxInv = spline->x_grid.delta_inv; 0723 float dyInv = spline->y_grid.delta_inv; 0724 float dzInv = spline->z_grid.delta_inv; 0725 for (int n=0; n<spline->num_splines; n++) { 0726 grads[3*n+0] *= dxInv; 0727 grads[3*n+1] *= dyInv; 0728 grads[3*n+2] *= dzInv; 0729 lapl3[3*n+0] *= dxInv*dxInv; 0730 lapl3[3*n+1] *= dyInv*dyInv; 0731 lapl3[3*n+2] *= dzInv*dzInv; 0732 lapl[n] = lapl3[3*n+0] + lapl3[3*n+1] + lapl3[3*n+2]; 0733 } 0734 } 0735 0736 0737 0738 inline void 0739 eval_multi_UBspline_3d_s_vgh (multi_UBspline_3d_s *spline, 0740 double x, double y, double z, 0741 float* restrict vals, 0742 float* restrict grads, 0743 float* restrict hess) 0744 { 0745 x -= spline->x_grid.start; 0746 y -= spline->y_grid.start; 0747 z -= spline->z_grid.start; 0748 float ux = x*spline->x_grid.delta_inv; 0749 float uy = y*spline->y_grid.delta_inv; 0750 float uz = z*spline->z_grid.delta_inv; 0751 float ipartx, iparty, ipartz, tx, ty, tz; 0752 tx = modff (ux, &ipartx); int ix = (int) ipartx; 0753 ty = modff (uy, &iparty); int iy = (int) iparty; 0754 tz = modff (uz, &ipartz); int iz = (int) ipartz; 0755 0756 float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], 0757 da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; 0758 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0759 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0760 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; 0761 float* restrict coefs = spline->coefs; 0762 0763 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); 0764 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); 0765 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); 0766 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); 0767 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); 0768 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); 0769 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); 0770 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); 0771 d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); 0772 d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); 0773 d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); 0774 d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); 0775 0776 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); 0777 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); 0778 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); 0779 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); 0780 db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); 0781 db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); 0782 db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); 0783 db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); 0784 d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); 0785 d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); 0786 d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); 0787 d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); 0788 0789 c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); 0790 c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); 0791 c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); 0792 c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); 0793 dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); 0794 dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); 0795 dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); 0796 dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); 0797 d2c[0] = (d2Af[ 0]*tpz[0] + d2Af[ 1]*tpz[1] + d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]); 0798 d2c[1] = (d2Af[ 4]*tpz[0] + d2Af[ 5]*tpz[1] + d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]); 0799 d2c[2] = (d2Af[ 8]*tpz[0] + d2Af[ 9]*tpz[1] + d2Af[10]*tpz[2] + d2Af[11]*tpz[3]); 0800 d2c[3] = (d2Af[12]*tpz[0] + d2Af[13]*tpz[1] + d2Af[14]*tpz[2] + d2Af[15]*tpz[3]); 0801 0802 int xs = spline->x_stride; 0803 int ys = spline->y_stride; 0804 int zs = spline->z_stride; 0805 0806 for (int n=0; n<spline->num_splines; n++) { 0807 vals[n] = 0.0; 0808 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; 0809 for (int i=0; i<9; i++) 0810 hess[9*n+i] = 0.0; 0811 } 0812 0813 for (int i=0; i<4; i++) 0814 for (int j=0; j<4; j++) 0815 for (int k=0; k<4; k++) { 0816 float abc = a[i]*b[j]*c[k]; 0817 float dabc[3], d2abc[6]; 0818 dabc[0] = da[i]* b[j]* c[k]; 0819 dabc[1] = a[i]*db[j]* c[k]; 0820 dabc[2] = a[i]* b[j]*dc[k]; 0821 d2abc[0] = d2a[i]* b[j]* c[k]; 0822 d2abc[1] = da[i]* db[j]* c[k]; 0823 d2abc[2] = da[i]* b[j]* dc[k]; 0824 d2abc[3] = a[i]*d2b[j]* c[k]; 0825 d2abc[4] = a[i]* db[j]* dc[k]; 0826 d2abc[5] = a[i]* b[j]*d2c[k]; 0827 0828 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); 0829 for (int n=0; n<spline->num_splines; n++) { 0830 vals[n] += abc *coefs[n]; 0831 grads[3*n+0] += dabc[0]*coefs[n]; 0832 grads[3*n+1] += dabc[1]*coefs[n]; 0833 grads[3*n+2] += dabc[2]*coefs[n]; 0834 hess [9*n+0] += d2abc[0]*coefs[n]; 0835 hess [9*n+1] += d2abc[1]*coefs[n]; 0836 hess [9*n+2] += d2abc[2]*coefs[n]; 0837 hess [9*n+4] += d2abc[3]*coefs[n]; 0838 hess [9*n+5] += d2abc[4]*coefs[n]; 0839 hess [9*n+8] += d2abc[5]*coefs[n]; 0840 } 0841 } 0842 0843 float dxInv = spline->x_grid.delta_inv; 0844 float dyInv = spline->y_grid.delta_inv; 0845 float dzInv = spline->z_grid.delta_inv; 0846 for (int n=0; n<spline->num_splines; n++) { 0847 grads[3*n+0] *= dxInv; 0848 grads[3*n+1] *= dyInv; 0849 grads[3*n+2] *= dzInv; 0850 hess [9*n+0] *= dxInv*dxInv; 0851 hess [9*n+4] *= dyInv*dyInv; 0852 hess [9*n+8] *= dzInv*dzInv; 0853 hess [9*n+1] *= dxInv*dyInv; 0854 hess [9*n+2] *= dxInv*dzInv; 0855 hess [9*n+5] *= dyInv*dzInv; 0856 // Copy hessian elements into lower half of 3x3 matrix 0857 hess [9*n+3] = hess[9*n+1]; 0858 hess [9*n+6] = hess[9*n+2]; 0859 hess [9*n+7] = hess[9*n+5]; 0860 } 0861 } 0862 0863 inline void 0864 eval_multi_UBspline_3d_s_vghgh (multi_UBspline_3d_s *spline, 0865 double x, double y, double z, 0866 float* restrict vals, 0867 float* restrict grads, 0868 float* restrict hess, 0869 float* restrict gradhess) 0870 { 0871 x -= spline->x_grid.start; 0872 y -= spline->y_grid.start; 0873 z -= spline->z_grid.start; 0874 float ux = x*spline->x_grid.delta_inv; 0875 float uy = y*spline->y_grid.delta_inv; 0876 float uz = z*spline->z_grid.delta_inv; 0877 float ipartx, iparty, ipartz, tx, ty, tz; 0878 tx = modff (ux, &ipartx); int ix = (int) ipartx; 0879 ty = modff (uy, &iparty); int iy = (int) iparty; 0880 tz = modff (uz, &ipartz); int iz = (int) ipartz; 0881 0882 float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], 0883 da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4], 0884 d3a[4], d3b[4], d3c[4]; 0885 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0886 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0887 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; 0888 float* restrict coefs = spline->coefs; 0889 0890 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); 0891 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); 0892 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); 0893 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); 0894 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); 0895 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); 0896 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); 0897 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); 0898 d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); 0899 d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); 0900 d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); 0901 d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); 0902 d3a[0] = (/*d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] +*/ d3Af[ 3]*tpx[3]); 0903 d3a[1] = (/*d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] +*/ d3Af[ 7]*tpx[3]); 0904 d3a[2] = (/*d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] +*/ d3Af[11]*tpx[3]); 0905 d3a[3] = (/*d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] +*/ d3Af[15]*tpx[3]); 0906 0907 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); 0908 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); 0909 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); 0910 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); 0911 db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); 0912 db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); 0913 db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); 0914 db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); 0915 d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); 0916 d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); 0917 d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); 0918 d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); 0919 d3b[0] = (/*d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] +*/ d3Af[ 3]*tpy[3]); 0920 d3b[1] = (/*d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] +*/ d3Af[ 7]*tpy[3]); 0921 d3b[2] = (/*d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] +*/ d3Af[11]*tpy[3]); 0922 d3b[3] = (/*d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] +*/ d3Af[15]*tpy[3]); 0923 0924 c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); 0925 c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); 0926 c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); 0927 c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); 0928 dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); 0929 dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); 0930 dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); 0931 dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); 0932 d2c[0] = (d2Af[ 0]*tpz[0] + d2Af[ 1]*tpz[1] + d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]); 0933 d2c[1] = (d2Af[ 4]*tpz[0] + d2Af[ 5]*tpz[1] + d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]); 0934 d2c[2] = (d2Af[ 8]*tpz[0] + d2Af[ 9]*tpz[1] + d2Af[10]*tpz[2] + d2Af[11]*tpz[3]); 0935 d2c[3] = (d2Af[12]*tpz[0] + d2Af[13]*tpz[1] + d2Af[14]*tpz[2] + d2Af[15]*tpz[3]); 0936 d3c[0] = (/*d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] +*/ d3Af[ 3]*tpz[3]); 0937 d3c[1] = (/*d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] +*/ d3Af[ 7]*tpz[3]); 0938 d3c[2] = (/*d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] +*/ d3Af[11]*tpz[3]); 0939 d3c[3] = (/*d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] +*/ d3Af[15]*tpz[3]); 0940 0941 int xs = spline->x_stride; 0942 int ys = spline->y_stride; 0943 int zs = spline->z_stride; 0944 0945 for (int n=0; n<spline->num_splines; n++) { 0946 vals[n] = 0.0; 0947 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; 0948 for (int i=0; i<9; i++) 0949 hess[9*n+i] = 0.0; 0950 for (int i=0; i<27; i++) 0951 gradhess[27*n+i] = 0.0; 0952 } 0953 0954 for (int i=0; i<4; i++) 0955 for (int j=0; j<4; j++) 0956 for (int k=0; k<4; k++) { 0957 float abc = a[i]*b[j]*c[k]; 0958 float dabc[3], d2abc[6], d3abc[10]; 0959 dabc[0] = da[i]* b[j]* c[k]; 0960 dabc[1] = a[i]*db[j]* c[k]; 0961 dabc[2] = a[i]* b[j]*dc[k]; 0962 d2abc[0] = d2a[i]* b[j]* c[k]; 0963 d2abc[1] = da[i]* db[j]* c[k]; 0964 d2abc[2] = da[i]* b[j]* dc[k]; 0965 d2abc[3] = a[i]*d2b[j]* c[k]; 0966 d2abc[4] = a[i]* db[j]* dc[k]; 0967 d2abc[5] = a[i]* b[j]*d2c[k]; 0968 0969 d3abc[0] = d3a[i]* b[j]* c[k]; 0970 d3abc[1] = d2a[i]* db[j]* c[k]; 0971 d3abc[2] = d2a[i]* b[j]* dc[k]; 0972 d3abc[3] = da[i]*d2b[j]* c[k]; 0973 d3abc[4] = da[i]* db[j]* dc[k]; 0974 d3abc[5] = da[i]* b[j]*d2c[k]; 0975 d3abc[6] = a[i]*d3b[j]* c[k]; 0976 d3abc[7] = a[i]*d2b[j]* dc[k]; 0977 d3abc[8] = a[i]* db[j]*d2c[k]; 0978 d3abc[9] = a[i]* b[j]*d3c[k]; 0979 0980 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); 0981 for (int n=0; n<spline->num_splines; n++) { 0982 vals[n] += abc *coefs[n]; 0983 grads[3*n+0] += dabc[0]*coefs[n]; 0984 grads[3*n+1] += dabc[1]*coefs[n]; 0985 grads[3*n+2] += dabc[2]*coefs[n]; 0986 hess [9*n+0] += d2abc[0]*coefs[n]; 0987 hess [9*n+1] += d2abc[1]*coefs[n]; 0988 hess [9*n+2] += d2abc[2]*coefs[n]; 0989 hess [9*n+4] += d2abc[3]*coefs[n]; 0990 hess [9*n+5] += d2abc[4]*coefs[n]; 0991 hess [9*n+8] += d2abc[5]*coefs[n]; 0992 0993 gradhess [27*n+0 ] += d3abc[0]*coefs[n]; 0994 gradhess [27*n+1 ] += d3abc[1]*coefs[n]; 0995 gradhess [27*n+2 ] += d3abc[2]*coefs[n]; 0996 gradhess [27*n+4 ] += d3abc[3]*coefs[n]; 0997 gradhess [27*n+5 ] += d3abc[4]*coefs[n]; 0998 gradhess [27*n+8 ] += d3abc[5]*coefs[n]; 0999 gradhess [27*n+13] += d3abc[6]*coefs[n]; 1000 gradhess [27*n+14] += d3abc[7]*coefs[n]; 1001 gradhess [27*n+17] += d3abc[8]*coefs[n]; 1002 gradhess [27*n+26] += d3abc[9]*coefs[n]; 1003 } 1004 } 1005 1006 float dxInv = spline->x_grid.delta_inv; 1007 float dyInv = spline->y_grid.delta_inv; 1008 float dzInv = spline->z_grid.delta_inv; 1009 for (int n=0; n<spline->num_splines; n++) { 1010 grads[3*n+0] *= dxInv; 1011 grads[3*n+1] *= dyInv; 1012 grads[3*n+2] *= dzInv; 1013 hess [9*n+0] *= dxInv*dxInv; 1014 hess [9*n+4] *= dyInv*dyInv; 1015 hess [9*n+8] *= dzInv*dzInv; 1016 hess [9*n+1] *= dxInv*dyInv; 1017 hess [9*n+2] *= dxInv*dzInv; 1018 hess [9*n+5] *= dyInv*dzInv; 1019 // Copy hessian elements into lower half of 3x3 matrix 1020 hess [9*n+3] = hess[9*n+1]; 1021 hess [9*n+6] = hess[9*n+2]; 1022 hess [9*n+7] = hess[9*n+5]; 1023 1024 gradhess [27*n+0 ] *= dxInv*dxInv*dxInv; 1025 gradhess [27*n+1 ] *= dxInv*dxInv*dyInv; 1026 gradhess [27*n+2 ] *= dxInv*dxInv*dzInv; 1027 gradhess [27*n+4 ] *= dxInv*dyInv*dyInv; 1028 gradhess [27*n+5 ] *= dxInv*dyInv*dzInv; 1029 gradhess [27*n+8 ] *= dxInv*dzInv*dzInv; 1030 gradhess [27*n+13] *= dyInv*dyInv*dyInv; 1031 gradhess [27*n+14] *= dyInv*dyInv*dzInv; 1032 gradhess [27*n+17] *= dyInv*dzInv*dzInv; 1033 gradhess [27*n+26] *= dzInv*dzInv*dzInv; 1034 1035 // Copy gradhess elements into rest of tensor 1036 gradhess [27*n+9 ] = gradhess [27*n+3 ] = gradhess [27*n+1 ]; 1037 gradhess [27*n+18 ] = gradhess [27*n+6 ] = gradhess [27*n+2 ]; 1038 gradhess [27*n+22 ] = gradhess [27*n+16 ] = gradhess [27*n+14]; 1039 gradhess [27*n+12 ] = gradhess [27*n+10 ] = gradhess [27*n+4 ]; 1040 gradhess [27*n+24 ] = gradhess [27*n+20 ] = gradhess [27*n+8 ]; 1041 gradhess [27*n+25 ] = gradhess [27*n+23 ] = gradhess [27*n+17]; 1042 gradhess [27*n+21 ] = gradhess [27*n+19 ] = gradhess [27*n+15] = gradhess [27*n+11 ] = gradhess [27*n+7 ] = gradhess [27*n+5]; 1043 1044 } 1045 } 1046 1047 #endif