File indexing completed on 2024-12-22 04:09:50

0001 /////////////////////////////////////////////////////////////////////////////
0002 //  einspline:  a library for creating and evaluating B-splines            //
0003 //  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
0004 //                                                                         //
0005 //  This program is free software; you can redistribute it and/or modify   //
0006 //  it under the terms of the GNU General Public License as published by   //
0007 //  the Free Software Foundation; either version 2 of the License, or      //
0008 //  (at your option) any later version.                                    //
0009 //                                                                         //
0010 //  This program is distributed in the hope that it will be useful,        //
0011 //  but WITHOUT ANY WARRANTY; without even the implied warranty of         //
0012 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
0013 //  GNU General Public License for more details.                           //
0014 //                                                                         //
0015 //  You should have received a copy of the GNU General Public License      //
0016 //  along with this program; if not, write to the Free Software            //
0017 //  Foundation, Inc., 51 Franklin Street, Fifth Floor,                     //
0018 //  Boston, MA  02110-1301  USA                                            //
0019 /////////////////////////////////////////////////////////////////////////////
0020 
0021 #ifndef MULTI_BSPLINE_EVAL_STD_S_H
0022 #define MULTI_BSPLINE_EVAL_STD_S_H
0023 
0024 #include <math.h>
0025 #include <stdio.h>
0026 #include "multi_bspline_structs.h"
0027 
0028 extern const float* restrict   Af;
0029 extern const float* restrict  dAf;
0030 extern const float* restrict d2Af;
0031 extern const float* restrict d3Af;
0032 
0033 /************************************************************/
0034 /* 1D double-precision, real evaulation functions        */
0035 /************************************************************/
0036 inline void
0037 eval_multi_UBspline_1d_s (multi_UBspline_1d_s *spline,
0038               double x,
0039               float* restrict vals)
0040 {
0041   x -= spline->x_grid.start;
0042   float ux = x*spline->x_grid.delta_inv;
0043   float ipartx, tx;
0044   tx = modff (ux, &ipartx);  int ix = (int) ipartx;
0045   
0046   float tpx[4], a[4];
0047   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
0048   float* restrict coefs = spline->coefs;
0049   
0050   a[0]  = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
0051   a[1]  = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
0052   a[2]  = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
0053   a[3]  = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
0054 
0055   int xs = spline->x_stride;
0056 
0057   for (int n=0; n<spline->num_splines; n++) 
0058     vals[n]  = 0.0;
0059 
0060   for (int i=0; i<4; i++) {
0061     float* restrict coefs = spline->coefs + ((ix+i)*xs);
0062     for (int n=0; n<spline->num_splines; n++) 
0063       vals[n]  +=   a[i] * coefs[n];
0064   }
0065 }
0066 
0067 
0068 
0069 inline void
0070 eval_multi_UBspline_1d_s_vg (multi_UBspline_1d_s *spline,
0071                  double x,
0072                  float* restrict vals,
0073                  float* restrict grads)
0074 {
0075   x -= spline->x_grid.start;
0076   float ux = x*spline->x_grid.delta_inv;
0077   float ipartx, tx;
0078   tx = modff (ux, &ipartx);  int ix = (int) ipartx;
0079   
0080   float tpx[4], a[4], da[4];
0081   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
0082   float* restrict coefs = spline->coefs;
0083 
0084   a[0]  = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
0085   a[1]  = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
0086   a[2]  = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
0087   a[3]  = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
0088   da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
0089   da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
0090   da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
0091   da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
0092 
0093   int xs = spline->x_stride;
0094 
0095   for (int n=0; n<spline->num_splines; n++) {
0096     vals[n]  = 0.0;
0097     grads[n] = 0.0;
0098   }
0099 
0100   for (int i=0; i<4; i++) { 
0101     float* restrict coefs = spline->coefs + ((ix+i)*xs);
0102     for (int n=0; n<spline->num_splines; n++) {
0103       vals[n]  +=   a[i] * coefs[n];
0104       grads[n] +=  da[i] * coefs[n];
0105     }
0106   }
0107   
0108   float dxInv = spline->x_grid.delta_inv;
0109   for (int n=0; n<spline->num_splines; n++) 
0110     grads[n] *= dxInv;
0111 }
0112 
0113 
0114 inline void
0115 eval_multi_UBspline_1d_s_vgl (multi_UBspline_1d_s *spline,
0116                   double x,
0117                   float* restrict vals,
0118                   float* restrict grads,
0119                   float* restrict lapl)   
0120 {
0121   x -= spline->x_grid.start;
0122   float ux = x*spline->x_grid.delta_inv;
0123   float ipartx, tx;
0124   tx = modff (ux, &ipartx);  int ix = (int) ipartx;
0125   
0126   float tpx[4], a[4], da[4], d2a[4];
0127   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
0128   float* restrict coefs = spline->coefs;
0129 
0130   a[0]  = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
0131   a[1]  = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
0132   a[2]  = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
0133   a[3]  = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
0134   da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
0135   da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
0136   da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
0137   da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
0138   d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]);
0139   d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]);
0140   d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]);
0141   d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]);
0142 
0143   int xs = spline->x_stride;
0144 
0145   for (int n=0; n<spline->num_splines; n++) {
0146     vals[n]  = 0.0;
0147     grads[n] = 0.0;
0148     lapl[n]  = 0.0;
0149   }
0150 
0151   for (int i=0; i<4; i++) {      
0152     float* restrict coefs = spline->coefs + ((ix+i)*xs);
0153     for (int n=0; n<spline->num_splines; n++) {
0154       vals[n]  +=   a[i] * coefs[n];
0155       grads[n] +=  da[i] * coefs[n];
0156       lapl[n]  += d2a[i] * coefs[n];
0157     }
0158   }
0159 
0160   float dxInv = spline->x_grid.delta_inv;
0161   for (int n=0; n<spline->num_splines; n++) {
0162     grads[n] *= dxInv;
0163     lapl [n] *= dxInv*dxInv;
0164   }
0165 }
0166 
0167 inline void
0168 eval_multi_UBspline_1d_s_vgh (multi_UBspline_1d_s *spline,
0169                   double x,
0170                   float* restrict vals,
0171                   float* restrict grads,
0172                   float* restrict hess)
0173 {
0174   eval_multi_UBspline_1d_s_vgl (spline, x, vals, grads, hess);
0175 }
0176 
0177 
0178 /************************************************************/
0179 /* 2D double-precision, real evaulation functions        */
0180 /************************************************************/
0181 inline void
0182 eval_multi_UBspline_2d_s (multi_UBspline_2d_s *spline,
0183               double x, double y,
0184               float* restrict vals)
0185 {
0186   x -= spline->x_grid.start;
0187   y -= spline->y_grid.start;
0188   float ux = x*spline->x_grid.delta_inv;
0189   float uy = y*spline->y_grid.delta_inv;
0190   float ipartx, iparty, tx, ty;
0191   tx = modff (ux, &ipartx);  int ix = (int) ipartx;
0192   ty = modff (uy, &iparty);  int iy = (int) iparty;
0193   
0194   float tpx[4], tpy[4], a[4], b[4];
0195   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
0196   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
0197   float* restrict coefs = spline->coefs;
0198 
0199   a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
0200   a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
0201   a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
0202   a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
0203 
0204   b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
0205   b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
0206   b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
0207   b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
0208 
0209   int xs = spline->x_stride;
0210   int ys = spline->y_stride;
0211 
0212   for (int n=0; n<spline->num_splines; n++)
0213     vals[n] = 0.0;
0214 
0215   for (int i=0; i<4; i++)
0216     for (int j=0; j<4; j++) {
0217       float prefactor = a[i]*b[j];
0218       float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
0219       for (int n=0; n<spline->num_splines; n++) 
0220     vals[n] += prefactor*coefs[n];
0221     }
0222 }
0223 
0224 
0225 inline void
0226 eval_multi_UBspline_2d_s_vg (multi_UBspline_2d_s *spline,
0227                  double x, double y,
0228                  float* restrict vals,
0229                  float* restrict grads)
0230 {
0231   x -= spline->x_grid.start;
0232   y -= spline->y_grid.start;
0233   float ux = x*spline->x_grid.delta_inv;
0234   float uy = y*spline->y_grid.delta_inv;
0235   float ipartx, iparty, tx, ty;
0236   tx = modff (ux, &ipartx);  int ix = (int) ipartx;
0237   ty = modff (uy, &iparty);  int iy = (int) iparty;
0238   
0239   float tpx[4], tpy[4], a[4], b[4], da[4], db[4];
0240   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
0241   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
0242   float* restrict coefs = spline->coefs;
0243 
0244   a[0]  = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
0245   a[1]  = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
0246   a[2]  = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
0247   a[3]  = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
0248   da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
0249   da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
0250   da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
0251   da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
0252 
0253   b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
0254   b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
0255   b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
0256   b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
0257   db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]);
0258   db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]);
0259   db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]);
0260   db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]);
0261 
0262   int xs = spline->x_stride;
0263   int ys = spline->y_stride;
0264 
0265   for (int n=0; n<spline->num_splines; n++) {
0266     vals[n] = 0.0;
0267     grads[2*n+0] = grads[2*n+1] = grads[2*n+2] = 0.0;
0268   }
0269 
0270   for (int i=0; i<4; i++)
0271     for (int j=0; j<4; j++) {
0272       float ab = a[i]*b[j];
0273       float dab[2];
0274       dab[0] = da[i]* b[j];
0275       dab[1] =  a[i]*db[j];
0276       
0277       float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
0278       for (int n=0; n<spline->num_splines; n++) {
0279     vals [n]     +=   ab   *coefs[n];
0280     grads[2*n+0] +=  dab[0]*coefs[n];
0281     grads[2*n+1] +=  dab[1]*coefs[n];
0282       }
0283     }
0284 
0285   float dxInv = spline->x_grid.delta_inv;
0286   float dyInv = spline->y_grid.delta_inv;
0287   for (int n=0; n<spline->num_splines; n++) {
0288     grads[2*n+0] *= dxInv;
0289     grads[2*n+1] *= dyInv;
0290   }
0291 }
0292 
0293 inline void
0294 eval_multi_UBspline_2d_s_vgl (multi_UBspline_2d_s *spline,
0295                   double x, double y,
0296                   float* restrict vals,
0297                   float* restrict grads,
0298                   float* restrict lapl)   
0299 {
0300   x -= spline->x_grid.start;
0301   y -= spline->y_grid.start;
0302   float ux = x*spline->x_grid.delta_inv;
0303   float uy = y*spline->y_grid.delta_inv;
0304   float ipartx, iparty, tx, ty;
0305   tx = modff (ux, &ipartx);  int ix = (int) ipartx;
0306   ty = modff (uy, &iparty);  int iy = (int) iparty;
0307   
0308   float tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4];
0309   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
0310   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
0311   float* restrict coefs = spline->coefs;
0312 
0313   a[0]  = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
0314   a[1]  = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
0315   a[2]  = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
0316   a[3]  = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
0317   da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
0318   da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
0319   da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
0320   da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
0321   d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]);
0322   d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]);
0323   d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]);
0324   d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]);
0325 
0326   b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
0327   b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
0328   b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
0329   b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
0330   db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]);
0331   db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]);
0332   db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]);
0333   db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]);
0334   d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]);
0335   d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]);
0336   d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]);
0337   d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]);
0338 
0339   int xs = spline->x_stride;
0340   int ys = spline->y_stride;
0341 
0342   float lapl2[2*spline->num_splines];
0343   for (int n=0; n<spline->num_splines; n++) {
0344     vals[n] = 0.0;
0345     grads[2*n+0] = grads[2*n+1] = 0.0;
0346     lapl2[2*n+0] = lapl2[2*n+1] = 0.0;
0347   }
0348 
0349   for (int i=0; i<4; i++)
0350     for (int j=0; j<4; j++) {
0351       float ab = a[i]*b[j];
0352       float dab[2], d2ab[2];
0353       dab[0] = da[i]* b[j];
0354       dab[1] =  a[i]*db[j];
0355       d2ab[0] = d2a[i]*  b[j];
0356       d2ab[1] =   a[i]*d2b[j];
0357       
0358       float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
0359       for (int n=0; n<spline->num_splines; n++) {
0360     vals[n]      +=   ab   *coefs[n];
0361     grads[2*n+0] +=  dab[0]*coefs[n];
0362     grads[2*n+1] +=  dab[1]*coefs[n];
0363     lapl2[2*n+0] += d2ab[0]*coefs[n];
0364     lapl2[2*n+1] += d2ab[1]*coefs[n];
0365       }
0366     }
0367 
0368   float dxInv = spline->x_grid.delta_inv;
0369   float dyInv = spline->y_grid.delta_inv;
0370   for (int n=0; n<spline->num_splines; n++) {
0371     grads[2*n+0] *= dxInv;
0372     grads[2*n+1] *= dyInv;
0373     lapl2[2*n+0] *= dxInv*dxInv;
0374     lapl2[2*n+1] *= dyInv*dyInv;
0375     lapl[n] = lapl2[2*n+0] + lapl2[2*n+1];
0376   }
0377 }
0378 
0379 
0380 
0381 
0382 inline void
0383 eval_multi_UBspline_2d_s_vgh (multi_UBspline_2d_s *spline,
0384                   double x, double y,
0385                   float* restrict vals,
0386                   float* restrict grads,
0387                   float* restrict hess)   
0388 {
0389   x -= spline->x_grid.start;
0390   y -= spline->y_grid.start;
0391   float ux = x*spline->x_grid.delta_inv;
0392   float uy = y*spline->y_grid.delta_inv;
0393   float ipartx, iparty, tx, ty;
0394   tx = modff (ux, &ipartx);  int ix = (int) ipartx;
0395   ty = modff (uy, &iparty);  int iy = (int) iparty;
0396   
0397   float tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4];
0398   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
0399   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
0400   float* restrict coefs = spline->coefs;
0401 
0402   a[0]  = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
0403   a[1]  = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
0404   a[2]  = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
0405   a[3]  = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
0406   da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
0407   da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
0408   da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
0409   da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
0410   d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]);
0411   d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]);
0412   d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]);
0413   d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]);
0414 
0415   b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
0416   b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
0417   b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
0418   b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
0419   db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]);
0420   db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]);
0421   db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]);
0422   db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]);
0423   d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]);
0424   d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]);
0425   d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]);
0426   d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]);
0427 
0428   int xs = spline->x_stride;
0429   int ys = spline->y_stride;
0430 
0431   for (int n=0; n<spline->num_splines; n++) {
0432     vals[n] = 0.0;
0433     grads[2*n+0] = grads[2*n+1] = 0.0;
0434     for (int i=0; i<4; i++)
0435       hess[4*n+i] = 0.0;
0436   }
0437 
0438   for (int i=0; i<4; i++)
0439     for (int j=0; j<4; j++){
0440       float ab = a[i]*b[j];
0441       float dab[2], d2ab[3];
0442       dab[0] = da[i]* b[j];
0443       dab[1] =  a[i]*db[j];
0444       d2ab[0] = d2a[i] *   b[j];
0445       d2ab[1] =  da[i] *  db[j];
0446       d2ab[2] =   a[i] * d2b[j];
0447 
0448       float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
0449       for (int n=0; n<spline->num_splines; n++) {
0450     vals[n]      +=   ab   *coefs[n];
0451     grads[2*n+0] +=  dab[0]*coefs[n];
0452     grads[2*n+1] +=  dab[1]*coefs[n];
0453     hess [4*n+0] += d2ab[0]*coefs[n];
0454     hess [4*n+1] += d2ab[1]*coefs[n];
0455     hess [4*n+3] += d2ab[2]*coefs[n];
0456       }
0457     }
0458 
0459   float dxInv = spline->x_grid.delta_inv;
0460   float dyInv = spline->y_grid.delta_inv;
0461   for (int n=0; n<spline->num_splines; n++) {
0462     grads[2*n+0] *= dxInv;
0463     grads[2*n+1] *= dyInv;
0464     hess[4*n+0] *= dxInv*dxInv;
0465     hess[4*n+1] *= dxInv*dyInv;
0466     hess[4*n+3] *= dyInv*dyInv;
0467     // Copy hessian elements into lower half of 3x3 matrix
0468     hess[4*n+2] = hess[4*n+1];
0469   }
0470 }
0471 
0472 
0473 
0474 
0475 /************************************************************/
0476 /* 3D double-precision, real evaulation functions        */
0477 /************************************************************/
0478 inline void
0479 eval_multi_UBspline_3d_s (multi_UBspline_3d_s *spline,
0480               double x, double y, double z,
0481               float* restrict vals)
0482 {
0483   x -= spline->x_grid.start;
0484   y -= spline->y_grid.start;
0485   z -= spline->z_grid.start;
0486   float ux = x*spline->x_grid.delta_inv;
0487   float uy = y*spline->y_grid.delta_inv;
0488   float uz = z*spline->z_grid.delta_inv;
0489   float ipartx, iparty, ipartz, tx, ty, tz;
0490   tx = modff (ux, &ipartx);  int ix = (int) ipartx;
0491   ty = modff (uy, &iparty);  int iy = (int) iparty;
0492   tz = modff (uz, &ipartz);  int iz = (int) ipartz;
0493   
0494   float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4];
0495   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
0496   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
0497   tpz[0] = tz*tz*tz;  tpz[1] = tz*tz;  tpz[2] = tz;  tpz[3] = 1.0;
0498   float* restrict coefs = spline->coefs;
0499 
0500   a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
0501   a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
0502   a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
0503   a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
0504 
0505   b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
0506   b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
0507   b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
0508   b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
0509 
0510   c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]);
0511   c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]);
0512   c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]);
0513   c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]);
0514 
0515   int xs = spline->x_stride;
0516   int ys = spline->y_stride;
0517   int zs = spline->z_stride;
0518 
0519   for (int n=0; n<spline->num_splines; n++)
0520     vals[n] = 0.0;
0521 
0522   for (int i=0; i<4; i++)
0523     for (int j=0; j<4; j++) 
0524       for (int k=0; k<4; k++) {
0525     float abc = a[i]*b[j]*c[k];
0526     float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
0527     for (int n=0; n<spline->num_splines; n++) 
0528       vals[n] += abc*coefs[n];
0529       }
0530 }
0531 
0532 
0533 inline void
0534 eval_multi_UBspline_3d_s_vg (multi_UBspline_3d_s *spline,
0535                  double x, double y, double z,
0536                  float* restrict vals,
0537                  float* restrict grads)
0538 {
0539   x -= spline->x_grid.start;
0540   y -= spline->y_grid.start;
0541   z -= spline->z_grid.start;
0542   float ux = x*spline->x_grid.delta_inv;
0543   float uy = y*spline->y_grid.delta_inv;
0544   float uz = z*spline->z_grid.delta_inv;
0545   float ipartx, iparty, ipartz, tx, ty, tz;
0546   tx = modff (ux, &ipartx);  int ix = (int) ipartx;
0547   ty = modff (uy, &iparty);  int iy = (int) iparty;
0548   tz = modff (uz, &ipartz);  int iz = (int) ipartz;
0549   
0550   float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], 
0551     da[4], db[4], dc[4];
0552   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
0553   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
0554   tpz[0] = tz*tz*tz;  tpz[1] = tz*tz;  tpz[2] = tz;  tpz[3] = 1.0;
0555   float* restrict coefs = spline->coefs;
0556 
0557   a[0]  = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
0558   a[1]  = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
0559   a[2]  = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
0560   a[3]  = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
0561   da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
0562   da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
0563   da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
0564   da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
0565 
0566   b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
0567   b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
0568   b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
0569   b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
0570   db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]);
0571   db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]);
0572   db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]);
0573   db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]);
0574 
0575   c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]);
0576   c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]);
0577   c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]);
0578   c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]);
0579   dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]);
0580   dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]);
0581   dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]);
0582   dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]);
0583 
0584   int xs = spline->x_stride;
0585   int ys = spline->y_stride;
0586   int zs = spline->z_stride;
0587 
0588   for (int n=0; n<spline->num_splines; n++) {
0589     vals[n] = 0.0;
0590     grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
0591   }
0592 
0593   for (int i=0; i<4; i++)
0594     for (int j=0; j<4; j++) 
0595       for (int k=0; k<4; k++) {
0596     float abc = a[i]*b[j]*c[k];
0597     float dabc[3];
0598     dabc[0] = da[i]* b[j]* c[k];
0599     dabc[1] =  a[i]*db[j]* c[k];
0600     dabc[2] =  a[i]* b[j]*dc[k];
0601 
0602     float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
0603     for (int n=0; n<spline->num_splines; n++) {
0604       vals[n]      +=   abc   *coefs[n];
0605       grads[3*n+0] +=  dabc[0]*coefs[n];
0606       grads[3*n+1] +=  dabc[1]*coefs[n];
0607       grads[3*n+2] +=  dabc[2]*coefs[n];
0608     }
0609       }
0610 
0611   float dxInv = spline->x_grid.delta_inv;
0612   float dyInv = spline->y_grid.delta_inv;
0613   float dzInv = spline->z_grid.delta_inv; 
0614   for (int n=0; n<spline->num_splines; n++) {
0615     grads[3*n+0] *= dxInv;
0616     grads[3*n+1] *= dyInv;
0617     grads[3*n+2] *= dzInv;
0618   }
0619 }
0620 
0621 
0622 
0623 inline void
0624 eval_multi_UBspline_3d_s_vgl (multi_UBspline_3d_s *spline,
0625                   double x, double y, double z,
0626                   float* restrict vals,
0627                   float* restrict grads,
0628                   float* restrict lapl)   
0629 {
0630   x -= spline->x_grid.start;
0631   y -= spline->y_grid.start;
0632   z -= spline->z_grid.start;
0633   float ux = x*spline->x_grid.delta_inv;
0634   float uy = y*spline->y_grid.delta_inv;
0635   float uz = z*spline->z_grid.delta_inv;
0636   float ipartx, iparty, ipartz, tx, ty, tz;
0637   tx = modff (ux, &ipartx);  int ix = (int) ipartx;
0638   ty = modff (uy, &iparty);  int iy = (int) iparty;
0639   tz = modff (uz, &ipartz);  int iz = (int) ipartz;
0640   
0641   float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], 
0642     da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
0643   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
0644   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
0645   tpz[0] = tz*tz*tz;  tpz[1] = tz*tz;  tpz[2] = tz;  tpz[3] = 1.0;
0646   float* restrict coefs = spline->coefs;
0647 
0648   a[0]  = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
0649   a[1]  = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
0650   a[2]  = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
0651   a[3]  = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
0652   da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
0653   da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
0654   da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
0655   da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
0656   d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]);
0657   d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]);
0658   d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]);
0659   d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]);
0660 
0661   b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
0662   b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
0663   b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
0664   b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
0665   db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]);
0666   db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]);
0667   db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]);
0668   db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]);
0669   d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]);
0670   d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]);
0671   d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]);
0672   d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]);
0673 
0674   c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]);
0675   c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]);
0676   c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]);
0677   c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]);
0678   dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]);
0679   dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]);
0680   dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]);
0681   dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]);
0682   d2c[0] = (d2Af[ 0]*tpz[0] + d2Af[ 1]*tpz[1] + d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]);
0683   d2c[1] = (d2Af[ 4]*tpz[0] + d2Af[ 5]*tpz[1] + d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]);
0684   d2c[2] = (d2Af[ 8]*tpz[0] + d2Af[ 9]*tpz[1] + d2Af[10]*tpz[2] + d2Af[11]*tpz[3]);
0685   d2c[3] = (d2Af[12]*tpz[0] + d2Af[13]*tpz[1] + d2Af[14]*tpz[2] + d2Af[15]*tpz[3]);
0686 
0687   int xs = spline->x_stride;
0688   int ys = spline->y_stride;
0689   int zs = spline->z_stride;
0690 
0691   float lapl3[3*spline->num_splines];
0692   for (int n=0; n<spline->num_splines; n++) {
0693     vals[n] = 0.0;
0694     grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
0695     lapl3[3*n+0] = lapl3[3*n+1] = lapl3[3*n+2] = 0.0;
0696   }
0697 
0698   for (int i=0; i<4; i++)
0699     for (int j=0; j<4; j++) 
0700       for (int k=0; k<4; k++) {
0701     float abc = a[i]*b[j]*c[k];
0702     float dabc[3], d2abc[3];
0703     dabc[0] = da[i]* b[j]* c[k];
0704     dabc[1] =  a[i]*db[j]* c[k];
0705     dabc[2] =  a[i]* b[j]*dc[k];
0706     d2abc[0] = d2a[i]*  b[j]*  c[k];
0707     d2abc[1] =   a[i]*d2b[j]*  c[k];
0708     d2abc[2] =   a[i]*  b[j]*d2c[k];
0709 
0710     float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
0711     for (int n=0; n<spline->num_splines; n++) {
0712       vals[n]      +=   abc   *coefs[n];
0713       grads[3*n+0] +=  dabc[0]*coefs[n];
0714       grads[3*n+1] +=  dabc[1]*coefs[n];
0715       grads[3*n+2] +=  dabc[2]*coefs[n];
0716       lapl3[3*n+0] += d2abc[0]*coefs[n];
0717       lapl3[3*n+1] += d2abc[1]*coefs[n];
0718       lapl3[3*n+2] += d2abc[2]*coefs[n];
0719     }
0720       }
0721 
0722   float dxInv = spline->x_grid.delta_inv;
0723   float dyInv = spline->y_grid.delta_inv;
0724   float dzInv = spline->z_grid.delta_inv; 
0725   for (int n=0; n<spline->num_splines; n++) {
0726     grads[3*n+0] *= dxInv;
0727     grads[3*n+1] *= dyInv;
0728     grads[3*n+2] *= dzInv;
0729     lapl3[3*n+0] *= dxInv*dxInv;
0730     lapl3[3*n+1] *= dyInv*dyInv;
0731     lapl3[3*n+2] *= dzInv*dzInv;
0732     lapl[n] = lapl3[3*n+0] + lapl3[3*n+1] + lapl3[3*n+2];
0733   }
0734 }
0735 
0736 
0737 
0738 inline void
0739 eval_multi_UBspline_3d_s_vgh (multi_UBspline_3d_s *spline,
0740                   double x, double y, double z,
0741                   float* restrict vals,
0742                   float* restrict grads,
0743                   float* restrict hess)   
0744 {
0745   x -= spline->x_grid.start;
0746   y -= spline->y_grid.start;
0747   z -= spline->z_grid.start;
0748   float ux = x*spline->x_grid.delta_inv;
0749   float uy = y*spline->y_grid.delta_inv;
0750   float uz = z*spline->z_grid.delta_inv;
0751   float ipartx, iparty, ipartz, tx, ty, tz;
0752   tx = modff (ux, &ipartx);  int ix = (int) ipartx;
0753   ty = modff (uy, &iparty);  int iy = (int) iparty;
0754   tz = modff (uz, &ipartz);  int iz = (int) ipartz;
0755   
0756   float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], 
0757     da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
0758   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
0759   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
0760   tpz[0] = tz*tz*tz;  tpz[1] = tz*tz;  tpz[2] = tz;  tpz[3] = 1.0;
0761   float* restrict coefs = spline->coefs;
0762 
0763   a[0]  = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
0764   a[1]  = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
0765   a[2]  = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
0766   a[3]  = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
0767   da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
0768   da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
0769   da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
0770   da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
0771   d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]);
0772   d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]);
0773   d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]);
0774   d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]);
0775 
0776   b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
0777   b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
0778   b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
0779   b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
0780   db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]);
0781   db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]);
0782   db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]);
0783   db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]);
0784   d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]);
0785   d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]);
0786   d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]);
0787   d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]);
0788 
0789   c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]);
0790   c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]);
0791   c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]);
0792   c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]);
0793   dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]);
0794   dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]);
0795   dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]);
0796   dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]);
0797   d2c[0] = (d2Af[ 0]*tpz[0] + d2Af[ 1]*tpz[1] + d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]);
0798   d2c[1] = (d2Af[ 4]*tpz[0] + d2Af[ 5]*tpz[1] + d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]);
0799   d2c[2] = (d2Af[ 8]*tpz[0] + d2Af[ 9]*tpz[1] + d2Af[10]*tpz[2] + d2Af[11]*tpz[3]);
0800   d2c[3] = (d2Af[12]*tpz[0] + d2Af[13]*tpz[1] + d2Af[14]*tpz[2] + d2Af[15]*tpz[3]);
0801 
0802   int xs = spline->x_stride;
0803   int ys = spline->y_stride;
0804   int zs = spline->z_stride;
0805 
0806   for (int n=0; n<spline->num_splines; n++) {
0807     vals[n] = 0.0;
0808     grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
0809     for (int i=0; i<9; i++)
0810       hess[9*n+i] = 0.0;
0811   }
0812 
0813   for (int i=0; i<4; i++)
0814     for (int j=0; j<4; j++) 
0815       for (int k=0; k<4; k++) {
0816     float abc = a[i]*b[j]*c[k];
0817     float dabc[3], d2abc[6];
0818     dabc[0] = da[i]* b[j]* c[k];
0819     dabc[1] =  a[i]*db[j]* c[k];
0820     dabc[2] =  a[i]* b[j]*dc[k];
0821     d2abc[0] = d2a[i]*  b[j]*  c[k];
0822     d2abc[1] =  da[i]* db[j]*  c[k];
0823     d2abc[2] =  da[i]*  b[j]* dc[k];
0824     d2abc[3] =   a[i]*d2b[j]*  c[k];
0825     d2abc[4] =   a[i]* db[j]* dc[k];
0826     d2abc[5] =   a[i]*  b[j]*d2c[k];
0827 
0828     float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
0829     for (int n=0; n<spline->num_splines; n++) {
0830       vals[n]      +=   abc   *coefs[n];
0831       grads[3*n+0] +=  dabc[0]*coefs[n];
0832       grads[3*n+1] +=  dabc[1]*coefs[n];
0833       grads[3*n+2] +=  dabc[2]*coefs[n];
0834       hess [9*n+0] += d2abc[0]*coefs[n];
0835       hess [9*n+1] += d2abc[1]*coefs[n];
0836       hess [9*n+2] += d2abc[2]*coefs[n];
0837       hess [9*n+4] += d2abc[3]*coefs[n];
0838       hess [9*n+5] += d2abc[4]*coefs[n];
0839       hess [9*n+8] += d2abc[5]*coefs[n];
0840     }
0841       }
0842 
0843   float dxInv = spline->x_grid.delta_inv;
0844   float dyInv = spline->y_grid.delta_inv;
0845   float dzInv = spline->z_grid.delta_inv; 
0846   for (int n=0; n<spline->num_splines; n++) {
0847     grads[3*n+0] *= dxInv;
0848     grads[3*n+1] *= dyInv;
0849     grads[3*n+2] *= dzInv;
0850     hess [9*n+0] *= dxInv*dxInv;
0851     hess [9*n+4] *= dyInv*dyInv;
0852     hess [9*n+8] *= dzInv*dzInv;
0853     hess [9*n+1] *= dxInv*dyInv;
0854     hess [9*n+2] *= dxInv*dzInv;
0855     hess [9*n+5] *= dyInv*dzInv;
0856     // Copy hessian elements into lower half of 3x3 matrix
0857     hess [9*n+3] = hess[9*n+1];
0858     hess [9*n+6] = hess[9*n+2];
0859     hess [9*n+7] = hess[9*n+5];
0860   }
0861 }
0862 
0863 inline void
0864 eval_multi_UBspline_3d_s_vghgh (multi_UBspline_3d_s *spline,
0865                double x, double y, double z,
0866                float* restrict vals,
0867                float* restrict grads,
0868                float* restrict hess,
0869                float* restrict gradhess)    
0870 {
0871   x -= spline->x_grid.start;
0872   y -= spline->y_grid.start;
0873   z -= spline->z_grid.start;
0874   float ux = x*spline->x_grid.delta_inv;
0875   float uy = y*spline->y_grid.delta_inv;
0876   float uz = z*spline->z_grid.delta_inv;
0877   float ipartx, iparty, ipartz, tx, ty, tz;
0878   tx = modff (ux, &ipartx);  int ix = (int) ipartx;
0879   ty = modff (uy, &iparty);  int iy = (int) iparty;
0880   tz = modff (uz, &ipartz);  int iz = (int) ipartz;
0881   
0882   float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], 
0883     da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4],
0884     d3a[4], d3b[4], d3c[4];
0885   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
0886   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
0887   tpz[0] = tz*tz*tz;  tpz[1] = tz*tz;  tpz[2] = tz;  tpz[3] = 1.0;
0888   float* restrict coefs = spline->coefs;
0889 
0890   a[0]  = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
0891   a[1]  = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
0892   a[2]  = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
0893   a[3]  = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
0894   da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
0895   da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
0896   da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
0897   da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
0898   d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]);
0899   d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]);
0900   d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]);
0901   d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]);
0902   d3a[0] = (/*d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] +*/ d3Af[ 3]*tpx[3]);
0903   d3a[1] = (/*d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] +*/ d3Af[ 7]*tpx[3]);
0904   d3a[2] = (/*d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] +*/ d3Af[11]*tpx[3]);
0905   d3a[3] = (/*d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] +*/ d3Af[15]*tpx[3]);
0906   
0907   b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
0908   b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
0909   b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
0910   b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
0911   db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]);
0912   db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]);
0913   db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]);
0914   db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]);
0915   d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]);
0916   d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]);
0917   d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]);
0918   d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]);
0919   d3b[0] = (/*d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] +*/ d3Af[ 3]*tpy[3]);
0920   d3b[1] = (/*d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] +*/ d3Af[ 7]*tpy[3]);
0921   d3b[2] = (/*d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] +*/ d3Af[11]*tpy[3]);
0922   d3b[3] = (/*d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] +*/ d3Af[15]*tpy[3]);
0923 
0924   c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]);
0925   c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]);
0926   c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]);
0927   c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]);
0928   dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]);
0929   dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]);
0930   dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]);
0931   dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]);
0932   d2c[0] = (d2Af[ 0]*tpz[0] + d2Af[ 1]*tpz[1] + d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]);
0933   d2c[1] = (d2Af[ 4]*tpz[0] + d2Af[ 5]*tpz[1] + d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]);
0934   d2c[2] = (d2Af[ 8]*tpz[0] + d2Af[ 9]*tpz[1] + d2Af[10]*tpz[2] + d2Af[11]*tpz[3]);
0935   d2c[3] = (d2Af[12]*tpz[0] + d2Af[13]*tpz[1] + d2Af[14]*tpz[2] + d2Af[15]*tpz[3]);
0936   d3c[0] = (/*d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] +*/ d3Af[ 3]*tpz[3]);
0937   d3c[1] = (/*d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] +*/ d3Af[ 7]*tpz[3]);
0938   d3c[2] = (/*d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] +*/ d3Af[11]*tpz[3]);
0939   d3c[3] = (/*d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] +*/ d3Af[15]*tpz[3]);
0940 
0941   int xs = spline->x_stride;
0942   int ys = spline->y_stride;
0943   int zs = spline->z_stride;
0944 
0945   for (int n=0; n<spline->num_splines; n++) {
0946     vals[n] = 0.0;
0947     grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
0948     for (int i=0; i<9; i++)
0949       hess[9*n+i] = 0.0;
0950     for (int i=0; i<27; i++)
0951       gradhess[27*n+i] = 0.0;
0952   }
0953 
0954   for (int i=0; i<4; i++)
0955     for (int j=0; j<4; j++) 
0956       for (int k=0; k<4; k++) {
0957    float abc = a[i]*b[j]*c[k];
0958    float dabc[3], d2abc[6], d3abc[10];
0959    dabc[0] = da[i]* b[j]* c[k];
0960    dabc[1] =  a[i]*db[j]* c[k];
0961    dabc[2] =  a[i]* b[j]*dc[k];
0962    d2abc[0] = d2a[i]*  b[j]*  c[k];
0963    d2abc[1] =  da[i]* db[j]*  c[k];
0964    d2abc[2] =  da[i]*  b[j]* dc[k];
0965    d2abc[3] =   a[i]*d2b[j]*  c[k];
0966    d2abc[4] =   a[i]* db[j]* dc[k];
0967    d2abc[5] =   a[i]*  b[j]*d2c[k];
0968    
0969    d3abc[0] = d3a[i]*  b[j]*  c[k];
0970    d3abc[1] = d2a[i]* db[j]*  c[k];
0971    d3abc[2] = d2a[i]*  b[j]* dc[k];
0972    d3abc[3] =  da[i]*d2b[j]*  c[k];
0973    d3abc[4] =  da[i]* db[j]* dc[k];
0974    d3abc[5] =  da[i]*  b[j]*d2c[k];   
0975    d3abc[6] =   a[i]*d3b[j]*  c[k];
0976    d3abc[7] =   a[i]*d2b[j]* dc[k];
0977    d3abc[8] =   a[i]* db[j]*d2c[k];     
0978    d3abc[9] =   a[i]*  b[j]*d3c[k];
0979 
0980    float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
0981    for (int n=0; n<spline->num_splines; n++) {
0982      vals[n]      +=   abc   *coefs[n];
0983      grads[3*n+0] +=  dabc[0]*coefs[n];
0984      grads[3*n+1] +=  dabc[1]*coefs[n];
0985      grads[3*n+2] +=  dabc[2]*coefs[n];
0986      hess [9*n+0] += d2abc[0]*coefs[n];
0987      hess [9*n+1] += d2abc[1]*coefs[n];
0988      hess [9*n+2] += d2abc[2]*coefs[n];
0989      hess [9*n+4] += d2abc[3]*coefs[n];
0990      hess [9*n+5] += d2abc[4]*coefs[n];
0991      hess [9*n+8] += d2abc[5]*coefs[n];
0992      
0993      gradhess [27*n+0 ] += d3abc[0]*coefs[n];
0994      gradhess [27*n+1 ] += d3abc[1]*coefs[n];
0995      gradhess [27*n+2 ] += d3abc[2]*coefs[n];
0996      gradhess [27*n+4 ] += d3abc[3]*coefs[n];
0997      gradhess [27*n+5 ] += d3abc[4]*coefs[n];
0998      gradhess [27*n+8 ] += d3abc[5]*coefs[n];
0999      gradhess [27*n+13] += d3abc[6]*coefs[n];
1000      gradhess [27*n+14] += d3abc[7]*coefs[n];
1001      gradhess [27*n+17] += d3abc[8]*coefs[n];
1002      gradhess [27*n+26] += d3abc[9]*coefs[n];
1003    }
1004       }
1005 
1006   float dxInv = spline->x_grid.delta_inv;
1007   float dyInv = spline->y_grid.delta_inv;
1008   float dzInv = spline->z_grid.delta_inv; 
1009   for (int n=0; n<spline->num_splines; n++) {
1010     grads[3*n+0] *= dxInv;
1011     grads[3*n+1] *= dyInv;
1012     grads[3*n+2] *= dzInv;
1013     hess [9*n+0] *= dxInv*dxInv;
1014     hess [9*n+4] *= dyInv*dyInv;
1015     hess [9*n+8] *= dzInv*dzInv;
1016     hess [9*n+1] *= dxInv*dyInv;
1017     hess [9*n+2] *= dxInv*dzInv;
1018     hess [9*n+5] *= dyInv*dzInv;
1019     // Copy hessian elements into lower half of 3x3 matrix
1020     hess [9*n+3] = hess[9*n+1];
1021     hess [9*n+6] = hess[9*n+2];
1022     hess [9*n+7] = hess[9*n+5];
1023     
1024     gradhess [27*n+0 ] *= dxInv*dxInv*dxInv;
1025     gradhess [27*n+1 ] *= dxInv*dxInv*dyInv;
1026     gradhess [27*n+2 ] *= dxInv*dxInv*dzInv;
1027     gradhess [27*n+4 ] *= dxInv*dyInv*dyInv;
1028     gradhess [27*n+5 ] *= dxInv*dyInv*dzInv;
1029     gradhess [27*n+8 ] *= dxInv*dzInv*dzInv;
1030     gradhess [27*n+13] *= dyInv*dyInv*dyInv;
1031     gradhess [27*n+14] *= dyInv*dyInv*dzInv; 
1032     gradhess [27*n+17] *= dyInv*dzInv*dzInv;
1033     gradhess [27*n+26] *= dzInv*dzInv*dzInv;
1034     
1035     // Copy gradhess elements into rest of tensor
1036     gradhess [27*n+9  ] = gradhess [27*n+3  ] = gradhess [27*n+1 ];
1037     gradhess [27*n+18 ] = gradhess [27*n+6  ] = gradhess [27*n+2 ];
1038     gradhess [27*n+22 ] = gradhess [27*n+16 ] = gradhess [27*n+14];
1039     gradhess [27*n+12 ] = gradhess [27*n+10 ] = gradhess [27*n+4 ];
1040     gradhess [27*n+24 ] = gradhess [27*n+20 ] = gradhess [27*n+8 ];
1041     gradhess [27*n+25 ] = gradhess [27*n+23 ] = gradhess [27*n+17];
1042     gradhess [27*n+21 ] = gradhess [27*n+19 ] = gradhess [27*n+15] = gradhess [27*n+11 ] = gradhess [27*n+7 ] = gradhess [27*n+5];
1043 
1044   }
1045 }
1046 
1047 #endif