File indexing completed on 2024-12-22 04:09:51
0001 ///////////////////////////////////////////////////////////////////////////// 0002 // einspline: a library for creating and evaluating B-splines // 0003 // Copyright (C) 2007 Kenneth P. Esler, Jr. // 0004 // // 0005 // This program is free software; you can redistribute it and/or modify // 0006 // it under the terms of the GNU General Public License as published by // 0007 // the Free Software Foundation; either version 2 of the License, or // 0008 // (at your option) any later version. // 0009 // // 0010 // This program is distributed in the hope that it will be useful, // 0011 // but WITHOUT ANY WARRANTY; without even the implied warranty of // 0012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // 0013 // GNU General Public License for more details. // 0014 // // 0015 // You should have received a copy of the GNU General Public License // 0016 // along with this program; if not, write to the Free Software // 0017 // Foundation, Inc., 51 Franklin Street, Fifth Floor, // 0018 // Boston, MA 02110-1301 USA // 0019 ///////////////////////////////////////////////////////////////////////////// 0020 0021 #ifndef MULTI_BSPLINE_EVAL_STD_Z_IMPL_H 0022 #define MULTI_BSPLINE_EVAL_STD_Z_IMPL_H 0023 0024 #include <math.h> 0025 #include <stdio.h> 0026 #include "bspline_base.h" 0027 #include "multi_bspline_structs.h" 0028 0029 extern const double* restrict Ad; 0030 extern const double* restrict dAd; 0031 extern const double* restrict d2Ad; 0032 extern const double* restrict d3Ad; 0033 0034 /************************************************************/ 0035 /* 1D double-precision, complex evaulation functions */ 0036 /************************************************************/ 0037 void 0038 eval_multi_UBspline_1d_z (multi_UBspline_1d_z *spline, 0039 double x, 0040 complex_double* restrict vals) 0041 { 0042 x -= spline->x_grid.start; 0043 double ux = x*spline->x_grid.delta_inv; 0044 double ipartx, tx; 0045 tx = modf (ux, &ipartx); int ix = (int) ipartx; 0046 0047 double tpx[4], a[4]; 0048 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0049 complex_double* restrict coefs = spline->coefs; 0050 0051 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); 0052 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); 0053 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); 0054 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); 0055 0056 intptr_t xs = spline->x_stride; 0057 0058 for (int n=0; n<spline->num_splines; n++) 0059 vals[n] = 0.0; 0060 0061 for (int i=0; i<4; i++) { 0062 complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); 0063 for (int n=0; n<spline->num_splines; n++) 0064 vals[n] += a[i] * coefs[n]; 0065 } 0066 } 0067 0068 0069 0070 void 0071 eval_multi_UBspline_1d_z_vg (multi_UBspline_1d_z *spline, 0072 double x, 0073 complex_double* restrict vals, 0074 complex_double* restrict grads) 0075 { 0076 x -= spline->x_grid.start; 0077 double ux = x*spline->x_grid.delta_inv; 0078 double ipartx, tx; 0079 tx = modf (ux, &ipartx); int ix = (int) ipartx; 0080 0081 double tpx[4], a[4], da[4]; 0082 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0083 complex_double* restrict coefs = spline->coefs; 0084 0085 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); 0086 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); 0087 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); 0088 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); 0089 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); 0090 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); 0091 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); 0092 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); 0093 0094 intptr_t xs = spline->x_stride; 0095 0096 for (int n=0; n<spline->num_splines; n++) { 0097 vals[n] = 0.0; 0098 grads[n] = 0.0; 0099 } 0100 0101 for (int i=0; i<4; i++) { 0102 complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); 0103 for (int n=0; n<spline->num_splines; n++) { 0104 vals[n] += a[i] * coefs[n]; 0105 grads[n] += da[i] * coefs[n]; 0106 } 0107 } 0108 0109 double dxInv = spline->x_grid.delta_inv; 0110 for (int n=0; n<spline->num_splines; n++) 0111 grads[n] *= dxInv; 0112 } 0113 0114 0115 void 0116 eval_multi_UBspline_1d_z_vgl (multi_UBspline_1d_z *spline, 0117 double x, 0118 complex_double* restrict vals, 0119 complex_double* restrict grads, 0120 complex_double* restrict lapl) 0121 { 0122 x -= spline->x_grid.start; 0123 double ux = x*spline->x_grid.delta_inv; 0124 double ipartx, tx; 0125 tx = modf (ux, &ipartx); int ix = (int) ipartx; 0126 0127 double tpx[4], a[4], da[4], d2a[4]; 0128 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0129 complex_double* restrict coefs = spline->coefs; 0130 0131 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); 0132 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); 0133 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); 0134 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); 0135 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); 0136 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); 0137 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); 0138 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); 0139 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); 0140 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); 0141 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); 0142 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); 0143 0144 intptr_t xs = spline->x_stride; 0145 0146 for (int n=0; n<spline->num_splines; n++) { 0147 vals[n] = 0.0; 0148 grads[n] = 0.0; 0149 lapl[n] = 0.0; 0150 } 0151 0152 for (int i=0; i<4; i++) { 0153 complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); 0154 for (int n=0; n<spline->num_splines; n++) { 0155 vals[n] += a[i] * coefs[n]; 0156 grads[n] += da[i] * coefs[n]; 0157 lapl[n] += d2a[i] * coefs[n]; 0158 } 0159 } 0160 0161 double dxInv = spline->x_grid.delta_inv; 0162 for (int n=0; n<spline->num_splines; n++) { 0163 grads[n] *= dxInv; 0164 lapl [n] *= dxInv*dxInv; 0165 } 0166 } 0167 0168 0169 void 0170 eval_multi_UBspline_1d_z_vgh (multi_UBspline_1d_z *spline, 0171 double x, 0172 complex_double* restrict vals, 0173 complex_double* restrict grads, 0174 complex_double* restrict hess) 0175 { 0176 eval_multi_UBspline_1d_z_vgl (spline, x, vals, grads, hess); 0177 } 0178 0179 0180 /************************************************************/ 0181 /* 2D double-precision, complex evaulation functions */ 0182 /************************************************************/ 0183 void 0184 eval_multi_UBspline_2d_z (multi_UBspline_2d_z *spline, 0185 double x, double y, 0186 complex_double* restrict vals) 0187 { 0188 x -= spline->x_grid.start; 0189 y -= spline->y_grid.start; 0190 double ux = x*spline->x_grid.delta_inv; 0191 double uy = y*spline->y_grid.delta_inv; 0192 double ipartx, iparty, tx, ty; 0193 tx = modf (ux, &ipartx); int ix = (int) ipartx; 0194 ty = modf (uy, &iparty); int iy = (int) iparty; 0195 0196 double tpx[4], tpy[4], a[4], b[4]; 0197 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0198 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0199 complex_double* restrict coefs = spline->coefs; 0200 0201 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); 0202 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); 0203 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); 0204 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); 0205 0206 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); 0207 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); 0208 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); 0209 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); 0210 0211 intptr_t xs = spline->x_stride; 0212 intptr_t ys = spline->y_stride; 0213 0214 for (int n=0; n<spline->num_splines; n++) 0215 vals[n] = 0.0; 0216 0217 for (int i=0; i<4; i++) 0218 for (int j=0; j<4; j++) { 0219 double prefactor = a[i]*b[j]; 0220 complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); 0221 for (int n=0; n<spline->num_splines; n++) 0222 vals[n] += prefactor*coefs[n]; 0223 } 0224 } 0225 0226 0227 void 0228 eval_multi_UBspline_2d_z_vg (multi_UBspline_2d_z *spline, 0229 double x, double y, 0230 complex_double* restrict vals, 0231 complex_double* restrict grads) 0232 { 0233 x -= spline->x_grid.start; 0234 y -= spline->y_grid.start; 0235 double ux = x*spline->x_grid.delta_inv; 0236 double uy = y*spline->y_grid.delta_inv; 0237 double ipartx, iparty, tx, ty; 0238 tx = modf (ux, &ipartx); int ix = (int) ipartx; 0239 ty = modf (uy, &iparty); int iy = (int) iparty; 0240 0241 double tpx[4], tpy[4], a[4], b[4], da[4], db[4]; 0242 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0243 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0244 complex_double* restrict coefs = spline->coefs; 0245 0246 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); 0247 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); 0248 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); 0249 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); 0250 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); 0251 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); 0252 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); 0253 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); 0254 0255 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); 0256 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); 0257 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); 0258 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); 0259 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); 0260 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); 0261 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); 0262 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); 0263 0264 intptr_t xs = spline->x_stride; 0265 intptr_t ys = spline->y_stride; 0266 0267 for (int n=0; n<spline->num_splines; n++) { 0268 vals[n] = 0.0; 0269 grads[2*n+0] = grads[2*n+1] = grads[2*n+2] = 0.0; 0270 } 0271 0272 for (int i=0; i<4; i++) 0273 for (int j=0; j<4; j++) { 0274 double ab = a[i]*b[j]; 0275 double dab[2]; 0276 dab[0] = da[i]* b[j]; 0277 dab[1] = a[i]*db[j]; 0278 0279 complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); 0280 for (int n=0; n<spline->num_splines; n++) { 0281 vals [n] += ab *coefs[n]; 0282 grads[2*n+0] += dab[0]*coefs[n]; 0283 grads[2*n+1] += dab[1]*coefs[n]; 0284 } 0285 } 0286 0287 double dxInv = spline->x_grid.delta_inv; 0288 double dyInv = spline->y_grid.delta_inv; 0289 for (int n=0; n<spline->num_splines; n++) { 0290 grads[2*n+0] *= dxInv; 0291 grads[2*n+1] *= dyInv; 0292 } 0293 } 0294 0295 void 0296 eval_multi_UBspline_2d_z_vgl (multi_UBspline_2d_z *spline, 0297 double x, double y, 0298 complex_double* restrict vals, 0299 complex_double* restrict grads, 0300 complex_double* restrict lapl) 0301 { 0302 x -= spline->x_grid.start; 0303 y -= spline->y_grid.start; 0304 double ux = x*spline->x_grid.delta_inv; 0305 double uy = y*spline->y_grid.delta_inv; 0306 double ipartx, iparty, tx, ty; 0307 tx = modf (ux, &ipartx); int ix = (int) ipartx; 0308 ty = modf (uy, &iparty); int iy = (int) iparty; 0309 0310 double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; 0311 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0312 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0313 complex_double* restrict coefs = spline->coefs; 0314 0315 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); 0316 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); 0317 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); 0318 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); 0319 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); 0320 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); 0321 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); 0322 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); 0323 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); 0324 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); 0325 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); 0326 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); 0327 0328 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); 0329 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); 0330 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); 0331 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); 0332 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); 0333 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); 0334 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); 0335 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); 0336 d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); 0337 d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); 0338 d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); 0339 d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); 0340 0341 intptr_t xs = spline->x_stride; 0342 intptr_t ys = spline->y_stride; 0343 0344 //complex_double lapl2[2*spline->num_splines]; 0345 complex_double* restrict lapl2 = spline->lapl2; 0346 for (int n=0; n<spline->num_splines; n++) { 0347 vals[n] = 0.0; 0348 grads[2*n+0] = grads[2*n+1] = 0.0; 0349 lapl2[2*n+0] = lapl2[2*n+1] = 0.0; 0350 } 0351 0352 for (int i=0; i<4; i++) 0353 for (int j=0; j<4; j++) { 0354 double ab = a[i]*b[j]; 0355 double dab[2], d2ab[2]; 0356 dab[0] = da[i]* b[j]; 0357 dab[1] = a[i]*db[j]; 0358 d2ab[0] = d2a[i]* b[j]; 0359 d2ab[1] = a[i]*d2b[j]; 0360 0361 complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); 0362 for (int n=0; n<spline->num_splines; n++) { 0363 vals[n] += ab *coefs[n]; 0364 grads[2*n+0] += dab[0]*coefs[n]; 0365 grads[2*n+1] += dab[1]*coefs[n]; 0366 lapl2[2*n+0] += d2ab[0]*coefs[n]; 0367 lapl2[2*n+1] += d2ab[1]*coefs[n]; 0368 } 0369 } 0370 0371 double dxInv = spline->x_grid.delta_inv; 0372 double dyInv = spline->y_grid.delta_inv; 0373 for (int n=0; n<spline->num_splines; n++) { 0374 grads[2*n+0] *= dxInv; 0375 grads[2*n+1] *= dyInv; 0376 lapl2[2*n+0] *= dxInv*dxInv; 0377 lapl2[2*n+1] *= dyInv*dyInv; 0378 lapl[n] = lapl2[2*n+0] + lapl2[2*n+1]; 0379 } 0380 } 0381 0382 0383 0384 0385 void 0386 eval_multi_UBspline_2d_z_vgh (multi_UBspline_2d_z *spline, 0387 double x, double y, 0388 complex_double* restrict vals, 0389 complex_double* restrict grads, 0390 complex_double* restrict hess) 0391 { 0392 x -= spline->x_grid.start; 0393 y -= spline->y_grid.start; 0394 double ux = x*spline->x_grid.delta_inv; 0395 double uy = y*spline->y_grid.delta_inv; 0396 double ipartx, iparty, tx, ty; 0397 tx = modf (ux, &ipartx); int ix = (int) ipartx; 0398 ty = modf (uy, &iparty); int iy = (int) iparty; 0399 0400 double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; 0401 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0402 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0403 complex_double* restrict coefs = spline->coefs; 0404 0405 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); 0406 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); 0407 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); 0408 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); 0409 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); 0410 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); 0411 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); 0412 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); 0413 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); 0414 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); 0415 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); 0416 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); 0417 0418 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); 0419 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); 0420 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); 0421 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); 0422 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); 0423 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); 0424 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); 0425 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); 0426 d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); 0427 d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); 0428 d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); 0429 d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); 0430 0431 intptr_t xs = spline->x_stride; 0432 intptr_t ys = spline->y_stride; 0433 0434 for (int n=0; n<spline->num_splines; n++) { 0435 vals[n] = 0.0; 0436 grads[2*n+0] = grads[2*n+1] = 0.0; 0437 for (int i=0; i<4; i++) 0438 hess[4*n+i] = 0.0; 0439 } 0440 0441 for (int i=0; i<4; i++) 0442 for (int j=0; j<4; j++){ 0443 double ab = a[i]*b[j]; 0444 double dab[2], d2ab[3]; 0445 dab[0] = da[i]* b[j]; 0446 dab[1] = a[i]*db[j]; 0447 d2ab[0] = d2a[i] * b[j]; 0448 d2ab[1] = da[i] * db[j]; 0449 d2ab[2] = a[i] * d2b[j]; 0450 0451 complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); 0452 for (int n=0; n<spline->num_splines; n++) { 0453 vals[n] += ab *coefs[n]; 0454 grads[2*n+0] += dab[0]*coefs[n]; 0455 grads[2*n+1] += dab[1]*coefs[n]; 0456 hess [4*n+0] += d2ab[0]*coefs[n]; 0457 hess [4*n+1] += d2ab[1]*coefs[n]; 0458 hess [4*n+3] += d2ab[2]*coefs[n]; 0459 } 0460 } 0461 0462 double dxInv = spline->x_grid.delta_inv; 0463 double dyInv = spline->y_grid.delta_inv; 0464 for (int n=0; n<spline->num_splines; n++) { 0465 grads[2*n+0] *= dxInv; 0466 grads[2*n+1] *= dyInv; 0467 hess[4*n+0] *= dxInv*dxInv; 0468 hess[4*n+1] *= dxInv*dyInv; 0469 hess[4*n+3] *= dyInv*dyInv; 0470 // Copy hessian elements into lower half of 3x3 matrix 0471 hess[4*n+2] = hess[4*n+1]; 0472 } 0473 } 0474 0475 0476 0477 /************************************************************/ 0478 /* 3D double-precision, complex evaulation functions */ 0479 /************************************************************/ 0480 void 0481 eval_multi_UBspline_3d_z (multi_UBspline_3d_z *spline, 0482 double x, double y, double z, 0483 complex_double* restrict vals) 0484 { 0485 x -= spline->x_grid.start; 0486 y -= spline->y_grid.start; 0487 z -= spline->z_grid.start; 0488 double ux = x*spline->x_grid.delta_inv; 0489 double uy = y*spline->y_grid.delta_inv; 0490 double uz = z*spline->z_grid.delta_inv; 0491 double ipartx, iparty, ipartz, tx, ty, tz; 0492 tx = modf (ux, &ipartx); int ix = (int) ipartx; 0493 ty = modf (uy, &iparty); int iy = (int) iparty; 0494 tz = modf (uz, &ipartz); int iz = (int) ipartz; 0495 0496 double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4]; 0497 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0498 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0499 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; 0500 complex_double* restrict coefs = spline->coefs; 0501 0502 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); 0503 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); 0504 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); 0505 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); 0506 0507 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); 0508 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); 0509 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); 0510 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); 0511 0512 c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); 0513 c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); 0514 c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); 0515 c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); 0516 0517 intptr_t xs = spline->x_stride; 0518 intptr_t ys = spline->y_stride; 0519 intptr_t zs = spline->z_stride; 0520 0521 for (int n=0; n<spline->num_splines; n++) 0522 vals[n] = 0.0; 0523 0524 for (int i=0; i<4; i++) 0525 for (int j=0; j<4; j++) 0526 for (int k=0; k<4; k++) { 0527 double prefactor = a[i]*b[j]*c[k]; 0528 complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); 0529 for (int n=0; n<spline->num_splines; n++) 0530 vals[n] += prefactor*coefs[n]; 0531 } 0532 } 0533 0534 0535 void 0536 eval_multi_UBspline_3d_z_vg (multi_UBspline_3d_z *spline, 0537 double x, double y, double z, 0538 complex_double* restrict vals, 0539 complex_double* restrict grads) 0540 { 0541 x -= spline->x_grid.start; 0542 y -= spline->y_grid.start; 0543 z -= spline->z_grid.start; 0544 double ux = x*spline->x_grid.delta_inv; 0545 double uy = y*spline->y_grid.delta_inv; 0546 double uz = z*spline->z_grid.delta_inv; 0547 double ipartx, iparty, ipartz, tx, ty, tz; 0548 tx = modf (ux, &ipartx); int ix = (int) ipartx; 0549 ty = modf (uy, &iparty); int iy = (int) iparty; 0550 tz = modf (uz, &ipartz); int iz = (int) ipartz; 0551 0552 double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], 0553 da[4], db[4], dc[4]; 0554 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0555 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0556 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; 0557 complex_double* restrict coefs = spline->coefs; 0558 0559 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); 0560 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); 0561 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); 0562 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); 0563 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); 0564 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); 0565 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); 0566 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); 0567 0568 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); 0569 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); 0570 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); 0571 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); 0572 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); 0573 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); 0574 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); 0575 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); 0576 0577 c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); 0578 c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); 0579 c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); 0580 c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); 0581 dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); 0582 dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); 0583 dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); 0584 dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); 0585 0586 intptr_t xs = spline->x_stride; 0587 intptr_t ys = spline->y_stride; 0588 intptr_t zs = spline->z_stride; 0589 0590 for (int n=0; n<spline->num_splines; n++) { 0591 vals[n] = 0.0; 0592 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; 0593 } 0594 0595 for (int i=0; i<4; i++) 0596 for (int j=0; j<4; j++) 0597 for (int k=0; k<4; k++) { 0598 double abc = a[i]*b[j]*c[k]; 0599 double dabc[3]; 0600 dabc[0] = da[i]* b[j]* c[k]; 0601 dabc[1] = a[i]*db[j]* c[k]; 0602 dabc[2] = a[i]* b[j]*dc[k]; 0603 0604 complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); 0605 for (int n=0; n<spline->num_splines; n++) { 0606 vals[n] += abc *coefs[n]; 0607 grads[3*n+0] += dabc[0]*coefs[n]; 0608 grads[3*n+1] += dabc[1]*coefs[n]; 0609 grads[3*n+2] += dabc[2]*coefs[n]; 0610 } 0611 } 0612 0613 double dxInv = spline->x_grid.delta_inv; 0614 double dyInv = spline->y_grid.delta_inv; 0615 double dzInv = spline->z_grid.delta_inv; 0616 for (int n=0; n<spline->num_splines; n++) { 0617 grads[3*n+0] *= dxInv; 0618 grads[3*n+1] *= dyInv; 0619 grads[3*n+2] *= dzInv; 0620 } 0621 } 0622 0623 0624 0625 void 0626 eval_multi_UBspline_3d_z_vgl (multi_UBspline_3d_z *spline, 0627 double x, double y, double z, 0628 complex_double* restrict vals, 0629 complex_double* restrict grads, 0630 complex_double* restrict lapl) 0631 { 0632 x -= spline->x_grid.start; 0633 y -= spline->y_grid.start; 0634 z -= spline->z_grid.start; 0635 double ux = x*spline->x_grid.delta_inv; 0636 double uy = y*spline->y_grid.delta_inv; 0637 double uz = z*spline->z_grid.delta_inv; 0638 double ipartx, iparty, ipartz, tx, ty, tz; 0639 tx = modf (ux, &ipartx); int ix = (int) ipartx; 0640 ty = modf (uy, &iparty); int iy = (int) iparty; 0641 tz = modf (uz, &ipartz); int iz = (int) ipartz; 0642 0643 double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], 0644 da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; 0645 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0646 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0647 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; 0648 complex_double* restrict coefs = spline->coefs; 0649 0650 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); 0651 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); 0652 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); 0653 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); 0654 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); 0655 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); 0656 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); 0657 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); 0658 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); 0659 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); 0660 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); 0661 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); 0662 0663 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); 0664 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); 0665 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); 0666 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); 0667 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); 0668 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); 0669 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); 0670 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); 0671 d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); 0672 d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); 0673 d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); 0674 d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); 0675 0676 c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); 0677 c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); 0678 c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); 0679 c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); 0680 dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); 0681 dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); 0682 dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); 0683 dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); 0684 d2c[0] = (d2Ad[ 0]*tpz[0] + d2Ad[ 1]*tpz[1] + d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]); 0685 d2c[1] = (d2Ad[ 4]*tpz[0] + d2Ad[ 5]*tpz[1] + d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]); 0686 d2c[2] = (d2Ad[ 8]*tpz[0] + d2Ad[ 9]*tpz[1] + d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]); 0687 d2c[3] = (d2Ad[12]*tpz[0] + d2Ad[13]*tpz[1] + d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]); 0688 0689 intptr_t xs = spline->x_stride; 0690 intptr_t ys = spline->y_stride; 0691 intptr_t zs = spline->z_stride; 0692 0693 //complex_double lapl3[3*spline->num_splines]; 0694 complex_double* restrict lapl3 = spline->lapl3; 0695 for (int n=0; n<spline->num_splines; n++) { 0696 vals[n] = 0.0; 0697 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; 0698 lapl3[3*n+0] = lapl3[3*n+1] = lapl3[3*n+2] = 0.0; 0699 } 0700 0701 for (int i=0; i<4; i++) 0702 for (int j=0; j<4; j++) 0703 for (int k=0; k<4; k++) { 0704 double abc = a[i]*b[j]*c[k]; 0705 double dabc[3], d2abc[3]; 0706 dabc[0] = da[i]* b[j]* c[k]; 0707 dabc[1] = a[i]*db[j]* c[k]; 0708 dabc[2] = a[i]* b[j]*dc[k]; 0709 d2abc[0] = d2a[i]* b[j]* c[k]; 0710 d2abc[1] = a[i]*d2b[j]* c[k]; 0711 d2abc[2] = a[i]* b[j]*d2c[k]; 0712 0713 complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); 0714 for (int n=0; n<spline->num_splines; n++) { 0715 vals[n] += abc *coefs[n]; 0716 grads[3*n+0] += dabc[0]*coefs[n]; 0717 grads[3*n+1] += dabc[1]*coefs[n]; 0718 grads[3*n+2] += dabc[2]*coefs[n]; 0719 lapl3[3*n+0] += d2abc[0]*coefs[n]; 0720 lapl3[3*n+1] += d2abc[1]*coefs[n]; 0721 lapl3[3*n+2] += d2abc[2]*coefs[n]; 0722 } 0723 } 0724 0725 double dxInv = spline->x_grid.delta_inv; 0726 double dyInv = spline->y_grid.delta_inv; 0727 double dzInv = spline->z_grid.delta_inv; 0728 for (int n=0; n<spline->num_splines; n++) { 0729 grads[3*n+0] *= dxInv; 0730 grads[3*n+1] *= dyInv; 0731 grads[3*n+2] *= dzInv; 0732 lapl3[3*n+0] *= dxInv*dxInv; 0733 lapl3[3*n+1] *= dyInv*dyInv; 0734 lapl3[3*n+2] *= dzInv*dzInv; 0735 lapl[n] = lapl3[3*n+0] + lapl3[3*n+1] + lapl3[3*n+2]; 0736 } 0737 } 0738 0739 0740 0741 void 0742 eval_multi_UBspline_3d_z_vgh (multi_UBspline_3d_z *spline, 0743 double x, double y, double z, 0744 complex_double* restrict vals, 0745 complex_double* restrict grads, 0746 complex_double* restrict hess) 0747 { 0748 x -= spline->x_grid.start; 0749 y -= spline->y_grid.start; 0750 z -= spline->z_grid.start; 0751 double ux = x*spline->x_grid.delta_inv; 0752 double uy = y*spline->y_grid.delta_inv; 0753 double uz = z*spline->z_grid.delta_inv; 0754 double ipartx, iparty, ipartz, tx, ty, tz; 0755 tx = modf (ux, &ipartx); int ix = (int) ipartx; 0756 ty = modf (uy, &iparty); int iy = (int) iparty; 0757 tz = modf (uz, &ipartz); int iz = (int) ipartz; 0758 0759 double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], 0760 da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; 0761 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0762 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0763 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; 0764 complex_double* restrict coefs = spline->coefs; 0765 0766 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); 0767 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); 0768 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); 0769 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); 0770 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); 0771 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); 0772 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); 0773 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); 0774 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); 0775 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); 0776 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); 0777 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); 0778 0779 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); 0780 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); 0781 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); 0782 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); 0783 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); 0784 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); 0785 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); 0786 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); 0787 d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); 0788 d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); 0789 d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); 0790 d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); 0791 0792 c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); 0793 c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); 0794 c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); 0795 c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); 0796 dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); 0797 dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); 0798 dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); 0799 dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); 0800 d2c[0] = (d2Ad[ 0]*tpz[0] + d2Ad[ 1]*tpz[1] + d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]); 0801 d2c[1] = (d2Ad[ 4]*tpz[0] + d2Ad[ 5]*tpz[1] + d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]); 0802 d2c[2] = (d2Ad[ 8]*tpz[0] + d2Ad[ 9]*tpz[1] + d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]); 0803 d2c[3] = (d2Ad[12]*tpz[0] + d2Ad[13]*tpz[1] + d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]); 0804 0805 intptr_t xs = spline->x_stride; 0806 intptr_t ys = spline->y_stride; 0807 intptr_t zs = spline->z_stride; 0808 0809 for (int n=0; n<spline->num_splines; n++) { 0810 vals[n] = 0.0; 0811 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; 0812 for (int i=0; i<9; i++) 0813 hess[9*n+i] = 0.0; 0814 } 0815 0816 for (int i=0; i<4; i++) 0817 for (int j=0; j<4; j++) 0818 for (int k=0; k<4; k++) { 0819 double abc = a[i]*b[j]*c[k]; 0820 double dabc[3], d2abc[6]; 0821 dabc[0] = da[i]* b[j]* c[k]; 0822 dabc[1] = a[i]*db[j]* c[k]; 0823 dabc[2] = a[i]* b[j]*dc[k]; 0824 d2abc[0] = d2a[i]* b[j]* c[k]; 0825 d2abc[1] = da[i]* db[j]* c[k]; 0826 d2abc[2] = da[i]* b[j]* dc[k]; 0827 d2abc[3] = a[i]*d2b[j]* c[k]; 0828 d2abc[4] = a[i]* db[j]* dc[k]; 0829 d2abc[5] = a[i]* b[j]*d2c[k]; 0830 0831 complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); 0832 for (int n=0; n<spline->num_splines; n++) { 0833 vals[n] += abc *coefs[n]; 0834 grads[3*n+0] += dabc[0]*coefs[n]; 0835 grads[3*n+1] += dabc[1]*coefs[n]; 0836 grads[3*n+2] += dabc[2]*coefs[n]; 0837 hess [9*n+0] += d2abc[0]*coefs[n]; 0838 hess [9*n+1] += d2abc[1]*coefs[n]; 0839 hess [9*n+2] += d2abc[2]*coefs[n]; 0840 hess [9*n+4] += d2abc[3]*coefs[n]; 0841 hess [9*n+5] += d2abc[4]*coefs[n]; 0842 hess [9*n+8] += d2abc[5]*coefs[n]; 0843 } 0844 } 0845 0846 double dxInv = spline->x_grid.delta_inv; 0847 double dyInv = spline->y_grid.delta_inv; 0848 double dzInv = spline->z_grid.delta_inv; 0849 for (int n=0; n<spline->num_splines; n++) { 0850 grads[3*n+0] *= dxInv; 0851 grads[3*n+1] *= dyInv; 0852 grads[3*n+2] *= dzInv; 0853 hess[9*n+0] *= dxInv*dxInv; 0854 hess[9*n+4] *= dyInv*dyInv; 0855 hess[9*n+8] *= dzInv*dzInv; 0856 hess[9*n+1] *= dxInv*dyInv; 0857 hess[9*n+2] *= dxInv*dzInv; 0858 hess[9*n+5] *= dyInv*dzInv; 0859 // Copy hessian elements into lower half of 3x3 matrix 0860 hess[9*n+3] = hess[9*n+1]; 0861 hess[9*n+6] = hess[9*n+2]; 0862 hess[9*n+7] = hess[9*n+5]; 0863 } 0864 } 0865 0866 0867 void 0868 eval_multi_UBspline_3d_z_vghgh (multi_UBspline_3d_z *spline, 0869 double x, double y, double z, 0870 complex_double* restrict vals, 0871 complex_double* restrict grads, 0872 complex_double* restrict hess, 0873 complex_double* restrict gradhess) 0874 { 0875 x -= spline->x_grid.start; 0876 y -= spline->y_grid.start; 0877 z -= spline->z_grid.start; 0878 double ux = x*spline->x_grid.delta_inv; 0879 double uy = y*spline->y_grid.delta_inv; 0880 double uz = z*spline->z_grid.delta_inv; 0881 double ipartx, iparty, ipartz, tx, ty, tz; 0882 tx = modf (ux, &ipartx); int ix = (int) ipartx; 0883 ty = modf (uy, &iparty); int iy = (int) iparty; 0884 tz = modf (uz, &ipartz); int iz = (int) ipartz; 0885 0886 double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], 0887 da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4], 0888 d3a[4], d3b[4], d3c[4]; 0889 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; 0890 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; 0891 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; 0892 complex_double* restrict coefs = spline->coefs; 0893 0894 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); 0895 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); 0896 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); 0897 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); 0898 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); 0899 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); 0900 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); 0901 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); 0902 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); 0903 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); 0904 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); 0905 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); 0906 d3a[0] = (/*d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] +*/ d3Ad[ 3]*tpx[3]); 0907 d3a[1] = (/*d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] +*/ d3Ad[ 7]*tpx[3]); 0908 d3a[2] = (/*d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] +*/ d3Ad[11]*tpx[3]); 0909 d3a[3] = (/*d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] +*/ d3Ad[15]*tpx[3]); 0910 0911 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); 0912 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); 0913 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); 0914 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); 0915 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); 0916 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); 0917 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); 0918 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); 0919 d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); 0920 d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); 0921 d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); 0922 d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); 0923 d3b[0] = (/*d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] +*/ d3Ad[ 3]*tpy[3]); 0924 d3b[1] = (/*d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] +*/ d3Ad[ 7]*tpy[3]); 0925 d3b[2] = (/*d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] +*/ d3Ad[11]*tpy[3]); 0926 d3b[3] = (/*d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] +*/ d3Ad[15]*tpy[3]); 0927 0928 c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); 0929 c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); 0930 c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); 0931 c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); 0932 dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); 0933 dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); 0934 dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); 0935 dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); 0936 d2c[0] = (d2Ad[ 0]*tpz[0] + d2Ad[ 1]*tpz[1] + d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]); 0937 d2c[1] = (d2Ad[ 4]*tpz[0] + d2Ad[ 5]*tpz[1] + d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]); 0938 d2c[2] = (d2Ad[ 8]*tpz[0] + d2Ad[ 9]*tpz[1] + d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]); 0939 d2c[3] = (d2Ad[12]*tpz[0] + d2Ad[13]*tpz[1] + d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]); 0940 d3c[0] = (/*d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] +*/ d3Ad[ 3]*tpz[3]); 0941 d3c[1] = (/*d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] +*/ d3Ad[ 7]*tpz[3]); 0942 d3c[2] = (/*d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] +*/ d3Ad[11]*tpz[3]); 0943 d3c[3] = (/*d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] +*/ d3Ad[15]*tpz[3]); 0944 0945 intptr_t xs = spline->x_stride; 0946 intptr_t ys = spline->y_stride; 0947 intptr_t zs = spline->z_stride; 0948 0949 for (int n=0; n<spline->num_splines; n++) { 0950 vals[n] = 0.0; 0951 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; 0952 for (int i=0; i<9; i++) 0953 hess[9*n+i] = 0.0; 0954 for (int i=0; i<27; i++) 0955 gradhess[27*n+i] = 0.0; 0956 } 0957 0958 for (int i=0; i<4; i++) 0959 for (int j=0; j<4; j++) 0960 for (int k=0; k<4; k++) { 0961 double abc = a[i]*b[j]*c[k]; 0962 double dabc[3], d2abc[6], d3abc[10]; 0963 dabc[0] = da[i]* b[j]* c[k]; 0964 dabc[1] = a[i]*db[j]* c[k]; 0965 dabc[2] = a[i]* b[j]*dc[k]; 0966 d2abc[0] = d2a[i]* b[j]* c[k]; 0967 d2abc[1] = da[i]* db[j]* c[k]; 0968 d2abc[2] = da[i]* b[j]* dc[k]; 0969 d2abc[3] = a[i]*d2b[j]* c[k]; 0970 d2abc[4] = a[i]* db[j]* dc[k]; 0971 d2abc[5] = a[i]* b[j]*d2c[k]; 0972 0973 d3abc[0] = d3a[i]* b[j]* c[k]; 0974 d3abc[1] = d2a[i]* db[j]* c[k]; 0975 d3abc[2] = d2a[i]* b[j]* dc[k]; 0976 d3abc[3] = da[i]*d2b[j]* c[k]; 0977 d3abc[4] = da[i]* db[j]* dc[k]; 0978 d3abc[5] = da[i]* b[j]*d2c[k]; 0979 d3abc[6] = a[i]*d3b[j]* c[k]; 0980 d3abc[7] = a[i]*d2b[j]* dc[k]; 0981 d3abc[8] = a[i]* db[j]*d2c[k]; 0982 d3abc[9] = a[i]* b[j]*d3c[k]; 0983 0984 complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); 0985 for (int n=0; n<spline->num_splines; n++) { 0986 vals[n] += abc *coefs[n]; 0987 grads[3*n+0] += dabc[0]*coefs[n]; 0988 grads[3*n+1] += dabc[1]*coefs[n]; 0989 grads[3*n+2] += dabc[2]*coefs[n]; 0990 hess [9*n+0] += d2abc[0]*coefs[n]; 0991 hess [9*n+1] += d2abc[1]*coefs[n]; 0992 hess [9*n+2] += d2abc[2]*coefs[n]; 0993 hess [9*n+4] += d2abc[3]*coefs[n]; 0994 hess [9*n+5] += d2abc[4]*coefs[n]; 0995 hess [9*n+8] += d2abc[5]*coefs[n]; 0996 0997 gradhess [27*n+0 ] += d3abc[0]*coefs[n]; 0998 gradhess [27*n+1 ] += d3abc[1]*coefs[n]; 0999 gradhess [27*n+2 ] += d3abc[2]*coefs[n]; 1000 gradhess [27*n+4 ] += d3abc[3]*coefs[n]; 1001 gradhess [27*n+5 ] += d3abc[4]*coefs[n]; 1002 gradhess [27*n+8 ] += d3abc[5]*coefs[n]; 1003 gradhess [27*n+13] += d3abc[6]*coefs[n]; 1004 gradhess [27*n+14] += d3abc[7]*coefs[n]; 1005 gradhess [27*n+17] += d3abc[8]*coefs[n]; 1006 gradhess [27*n+26] += d3abc[9]*coefs[n]; 1007 } 1008 } 1009 1010 double dxInv = spline->x_grid.delta_inv; 1011 double dyInv = spline->y_grid.delta_inv; 1012 double dzInv = spline->z_grid.delta_inv; 1013 for (int n=0; n<spline->num_splines; n++) { 1014 grads[3*n+0] *= dxInv; 1015 grads[3*n+1] *= dyInv; 1016 grads[3*n+2] *= dzInv; 1017 hess [9*n+0] *= dxInv*dxInv; 1018 hess [9*n+4] *= dyInv*dyInv; 1019 hess [9*n+8] *= dzInv*dzInv; 1020 hess [9*n+1] *= dxInv*dyInv; 1021 hess [9*n+2] *= dxInv*dzInv; 1022 hess [9*n+5] *= dyInv*dzInv; 1023 // Copy hessian elements into lower half of 3x3 matrix 1024 hess [9*n+3] = hess[9*n+1]; 1025 hess [9*n+6] = hess[9*n+2]; 1026 hess [9*n+7] = hess[9*n+5]; 1027 1028 gradhess [27*n+0 ] *= dxInv*dxInv*dxInv; 1029 gradhess [27*n+1 ] *= dxInv*dxInv*dyInv; 1030 gradhess [27*n+2 ] *= dxInv*dxInv*dzInv; 1031 gradhess [27*n+4 ] *= dxInv*dyInv*dyInv; 1032 gradhess [27*n+5 ] *= dxInv*dyInv*dzInv; 1033 gradhess [27*n+8 ] *= dxInv*dzInv*dzInv; 1034 gradhess [27*n+13] *= dyInv*dyInv*dyInv; 1035 gradhess [27*n+14] *= dyInv*dyInv*dzInv; 1036 gradhess [27*n+17] *= dyInv*dzInv*dzInv; 1037 gradhess [27*n+26] *= dzInv*dzInv*dzInv; 1038 1039 // Copy gradhess elements into rest of tensor 1040 gradhess [27*n+9 ] = gradhess [27*n+3 ] = gradhess [27*n+1 ]; 1041 gradhess [27*n+18 ] = gradhess [27*n+6 ] = gradhess [27*n+2 ]; 1042 gradhess [27*n+22 ] = gradhess [27*n+16 ] = gradhess [27*n+14]; 1043 gradhess [27*n+12 ] = gradhess [27*n+10 ] = gradhess [27*n+4 ]; 1044 gradhess [27*n+24 ] = gradhess [27*n+20 ] = gradhess [27*n+8 ]; 1045 gradhess [27*n+25 ] = gradhess [27*n+23 ] = gradhess [27*n+17]; 1046 gradhess [27*n+21 ] = gradhess [27*n+19 ] = gradhess [27*n+15] = gradhess [27*n+11 ] = gradhess [27*n+7 ] = gradhess [27*n+5]; 1047 1048 } 1049 } 1050 1051 #endif