File indexing completed on 2024-12-22 04:09:52

0001 /////////////////////////////////////////////////////////////////////////////
0002 //  einspline:  a library for creating and evaluating B-splines            //
0003 //  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
0004 //                                                                         //
0005 //  This program is free software; you can redistribute it and/or modify   //
0006 //  it under the terms of the GNU General Public License as published by   //
0007 //  the Free Software Foundation; either version 2 of the License, or      //
0008 //  (at your option) any later version.                                    //
0009 //                                                                         //
0010 //  This program is distributed in the hope that it will be useful,        //
0011 //  but WITHOUT ANY WARRANTY; without even the implied warranty of         //
0012 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
0013 //  GNU General Public License for more details.                           //
0014 //                                                                         //
0015 //  You should have received a copy of the GNU General Public License      //
0016 //  along with this program; if not, write to the Free Software            //
0017 //  Foundation, Inc., 51 Franklin Street, Fifth Floor,                     //
0018 //  Boston, MA  02110-1301  USA                                            //
0019 /////////////////////////////////////////////////////////////////////////////
0020 
0021 #include "nubasis.h"
0022 #include <stdlib.h>
0023 
0024   
0025 
0026 NUBasis*
0027 create_NUBasis (NUgrid *grid, bool periodic)
0028 {
0029   NUBasis* restrict basis = new NUBasis;
0030   basis->grid = grid;
0031   basis->periodic = periodic;
0032   int N = grid->num_points;
0033   basis->xVals = new double[N+5];
0034   basis->dxInv = new double[3*(N+2)];
0035   for (int i=0; i<N; i++)
0036     basis->xVals[i+2] = grid->points[i];
0037   double*  restrict g = grid->points;
0038   // Extend grid points on either end to provide enough points to
0039   // construct a full basis set
0040   if (!periodic) {
0041     basis->xVals[0]   = g[ 0 ] - 2.0*(g[1]-g[0]);
0042     basis->xVals[1]   = g[ 0 ] - 1.0*(g[1]-g[0]);
0043     basis->xVals[N+2] = g[N-1] + 1.0*(g[N-1]-g[N-2]);
0044     basis->xVals[N+3] = g[N-1] + 2.0*(g[N-1]-g[N-2]);
0045     basis->xVals[N+4] = g[N-1] + 3.0*(g[N-1]-g[N-2]);
0046   }
0047   else {
0048     basis->xVals[1]   = g[ 0 ] - (g[N-1] - g[N-2]);
0049     basis->xVals[0]   = g[ 0 ] - (g[N-1] - g[N-3]);
0050     basis->xVals[N+2] = g[N-1] + (g[ 1 ] - g[ 0 ]);
0051     basis->xVals[N+3] = g[N-1] + (g[ 2 ] - g[ 0 ]);
0052     basis->xVals[N+4] = g[N-1] + (g[ 3 ] - g[ 0 ]);
0053   }
0054   for (int i=0; i<N+2; i++) 
0055     for (int j=0; j<3; j++) 
0056       basis->dxInv[3*i+j] = 
0057     1.0/(basis->xVals[i+j+1]-basis->xVals[i]);
0058   return basis;
0059 }
0060 
0061 void
0062 destroy_NUBasis (NUBasis *basis)
0063 {
0064   delete[] (basis->xVals);
0065   delete[] (basis->dxInv);
0066   delete (basis);
0067 }
0068 
0069 
0070 int
0071 get_NUBasis_funcs_s (NUBasis* restrict basis, double x,
0072              float bfuncs[4])
0073 {
0074   double b1[2], b2[3];
0075   int i = (*basis->grid->reverse_map)(basis->grid, x);
0076   int i2 = i+2;
0077   double* restrict dxInv = basis->dxInv;
0078   double* restrict xVals = basis->xVals;
0079 
0080   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0081   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0082   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0083   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0084            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0085   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0086   bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0087   bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0088            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0089   bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0090            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0091   bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2];
0092   return i;
0093 }
0094 
0095 
0096 void
0097 get_NUBasis_funcs_si (NUBasis* restrict basis, int i,
0098              float bfuncs[4])
0099 {
0100   int i2 = i+2;
0101   double b1[2], b2[3];
0102   double x = basis->grid->points[i];
0103   double* restrict dxInv = basis->dxInv;
0104   double* restrict xVals = basis->xVals; 
0105 
0106   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0107   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0108   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0109   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0110            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0111   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0112   bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0113   bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0114            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0115   bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0116            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0117   bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2];
0118 }
0119 
0120 int
0121 get_NUBasis_dfuncs_s (NUBasis* restrict basis, double x,
0122               float bfuncs[4], float dbfuncs[4])
0123 {
0124   double b1[2], b2[3];
0125   int i = (*basis->grid->reverse_map)(basis->grid, x);
0126   int i2 = i+2;
0127   double* restrict dxInv = basis->dxInv;
0128   double* restrict xVals = basis->xVals;
0129 
0130   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0131   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0132   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0133   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0134            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0135   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0136   bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0137   bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0138            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0139   bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0140            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0141   bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
0142 
0143   dbfuncs[0] = -3.0f * (dxInv[3*(i  )+2] * b2[0]);
0144   dbfuncs[1] =  3.0f * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
0145   dbfuncs[2] =  3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
0146   dbfuncs[3] =  3.0f * (dxInv[3*(i+2)+2] * b2[2]);
0147 
0148   return i;
0149 }
0150 
0151 
0152 void
0153 get_NUBasis_dfuncs_si (NUBasis* restrict basis, int i,
0154                float bfuncs[4], float dbfuncs[4])
0155 {
0156   double b1[2], b2[3];
0157   double x = basis->grid->points[i];
0158   int i2 = i+2;
0159   double* restrict dxInv = basis->dxInv;
0160   double* restrict xVals = basis->xVals;
0161 
0162   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0163   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0164   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0165   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0166            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0167   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0168   bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0169   bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0170            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0171   bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0172            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0173   bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
0174 
0175   dbfuncs[0] = -3.0f * (dxInv[3*(i  )+2] * b2[0]);
0176   dbfuncs[1] =  3.0f * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
0177   dbfuncs[2] =  3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
0178   dbfuncs[3] =  3.0f * (dxInv[3*(i+2)+2] * b2[2]);
0179 }
0180 
0181 
0182 int
0183 get_NUBasis_d2funcs_s (NUBasis* restrict basis, double x,
0184                float bfuncs[4], float dbfuncs[4], float d2bfuncs[4])
0185 {
0186   double b1[2], b2[3];
0187   int i = (*basis->grid->reverse_map)(basis->grid, x);
0188   int i2 = i+2;
0189   double* restrict dxInv = basis->dxInv;
0190   double* restrict xVals = basis->xVals;
0191 
0192   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0193   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0194   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0195   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0196            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0197   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0198   bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0199   bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0200            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0201   bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0202            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0203   bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
0204 
0205   dbfuncs[0] = -3.0f * (dxInv[3*(i  )+2] * b2[0]);
0206   dbfuncs[1] =  3.0f * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
0207   dbfuncs[2] =  3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
0208   dbfuncs[3] =  3.0f * (dxInv[3*(i+2)+2] * b2[2]);
0209 
0210   d2bfuncs[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
0211   d2bfuncs[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
0212                 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
0213   d2bfuncs[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
0214                 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
0215   d2bfuncs[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
0216 
0217   return i;
0218 }
0219 
0220 
0221 void
0222 get_NUBasis_d2funcs_si (NUBasis* restrict basis, int i,
0223             float bfuncs[4], float dbfuncs[4], float d2bfuncs[4])
0224 {
0225   double b1[2], b2[3];
0226   double x = basis->grid->points[i];
0227   int i2 = i+2;
0228   double* restrict dxInv = basis->dxInv;
0229   double* restrict xVals = basis->xVals;
0230 
0231   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0232   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0233   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0234   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0235            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0236   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0237   bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0238   bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0239            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0240   bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0241            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0242   bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
0243 
0244   dbfuncs[0] = -3.0f * (dxInv[3*(i  )+2] * b2[0]);
0245   dbfuncs[1] =  3.0f * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
0246   dbfuncs[2] =  3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
0247   dbfuncs[3] =  3.0f * (dxInv[3*(i+2)+2] * b2[2]);
0248 
0249   d2bfuncs[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
0250   d2bfuncs[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
0251                 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
0252   d2bfuncs[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
0253                 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
0254   d2bfuncs[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
0255 }
0256 
0257 
0258 //////////////////////////////
0259 // Double-precision version //
0260 //////////////////////////////
0261 int
0262 get_NUBasis_funcs_d (NUBasis* restrict basis, double x,
0263              double bfuncs[4])
0264 {
0265   double b1[2], b2[3];
0266   int i = (*basis->grid->reverse_map)(basis->grid, x);
0267   int i2 = i+2;
0268   double* restrict dxInv = basis->dxInv;
0269   double* restrict xVals = basis->xVals;
0270 
0271   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0272   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0273   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0274   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0275            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0276   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0277   bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0278   bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0279            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0280   bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0281            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0282   bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2];
0283   return i;
0284 }
0285 
0286 
0287 void
0288 get_NUBasis_funcs_di (NUBasis* restrict basis, int i,
0289               double bfuncs[4])
0290 {
0291   int i2 = i+2;
0292   double b1[2], b2[3];
0293   double x = basis->grid->points[i];
0294   double* restrict dxInv = basis->dxInv;
0295   double* restrict xVals = basis->xVals; 
0296 
0297   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0298   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0299   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0300   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0301            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0302   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0303   bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0304   bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0305            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0306   bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0307            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0308   bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2];
0309 }
0310 
0311 int
0312 get_NUBasis_dfuncs_d (NUBasis* restrict basis, double x,
0313               double bfuncs[4], double dbfuncs[4])
0314 {
0315   double b1[2], b2[3];
0316   int i = (*basis->grid->reverse_map)(basis->grid, x);
0317   int i2 = i+2;
0318   double* restrict dxInv = basis->dxInv;
0319   double* restrict xVals = basis->xVals;
0320 
0321   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0322   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0323   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0324   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0325            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0326   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0327   bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0328   bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0329            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0330   bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0331            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0332   bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
0333 
0334   dbfuncs[0] = -3.0f * (dxInv[3*(i  )+2] * b2[0]);
0335   dbfuncs[1] =  3.0f * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
0336   dbfuncs[2] =  3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
0337   dbfuncs[3] =  3.0f * (dxInv[3*(i+2)+2] * b2[2]);
0338 
0339   return i;
0340 }
0341 
0342 
0343 void
0344 get_NUBasis_dfuncs_di (NUBasis* restrict basis, int i,
0345                double bfuncs[4], double dbfuncs[4])
0346 {
0347   double b1[2], b2[3];
0348   double x = basis->grid->points[i];
0349   int i2 = i+2;
0350   double* restrict dxInv = basis->dxInv;
0351   double* restrict xVals = basis->xVals;
0352 
0353   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0354   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0355   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0356   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0357            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0358   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0359   bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0360   bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0361            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0362   bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0363            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0364   bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
0365 
0366   dbfuncs[0] = -3.0f * (dxInv[3*(i  )+2] * b2[0]);
0367   dbfuncs[1] =  3.0f * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
0368   dbfuncs[2] =  3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
0369   dbfuncs[3] =  3.0f * (dxInv[3*(i+2)+2] * b2[2]);
0370 }
0371 
0372 
0373 int
0374 get_NUBasis_d2funcs_d (NUBasis* restrict basis, double x,
0375                double bfuncs[4], double dbfuncs[4], double d2bfuncs[4])
0376 {
0377   double b1[2], b2[3];
0378   int i = (*basis->grid->reverse_map)(basis->grid, x);
0379   int i2 = i+2;
0380   double* restrict dxInv = basis->dxInv;
0381   double* restrict xVals = basis->xVals;
0382 
0383   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0384   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0385   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0386   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0387            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0388   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0389   bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0390   bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0391            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0392   bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0393            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0394   bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
0395 
0396   dbfuncs[0] = -3.0f * (dxInv[3*(i  )+2] * b2[0]);
0397   dbfuncs[1] =  3.0f * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
0398   dbfuncs[2] =  3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
0399   dbfuncs[3] =  3.0f * (dxInv[3*(i+2)+2] * b2[2]);
0400 
0401   d2bfuncs[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
0402   d2bfuncs[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
0403                 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
0404   d2bfuncs[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
0405                 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
0406   d2bfuncs[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
0407 
0408   return i;
0409 }
0410 
0411 
0412 void
0413 get_NUBasis_d2funcs_di (NUBasis* restrict basis, int i,
0414             double bfuncs[4], double dbfuncs[4], 
0415             double d2bfuncs[4])
0416 {
0417   double b1[2], b2[3];
0418   double x = basis->grid->points[i];
0419   int i2 = i+2;
0420   double* restrict dxInv = basis->dxInv;
0421   double* restrict xVals = basis->xVals;
0422 
0423   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0424   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0425   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0426   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0427            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0428   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0429   bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0430   bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0431            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0432   bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0433            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0434   bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
0435 
0436   dbfuncs[0] = -3.0f * (dxInv[3*(i  )+2] * b2[0]);
0437   dbfuncs[1] =  3.0f * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
0438   dbfuncs[2] =  3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
0439   dbfuncs[3] =  3.0f * (dxInv[3*(i+2)+2] * b2[2]);
0440 
0441   d2bfuncs[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
0442   d2bfuncs[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
0443                 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
0444   d2bfuncs[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
0445                 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
0446   d2bfuncs[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
0447 }
0448 
0449 
0450 #ifdef HAVE_SSE2
0451 typedef union
0452 {
0453   float s[4];
0454   __m128 v;
0455 } uvec4;
0456 
0457 typedef union
0458 {
0459   double s[2];
0460   __m128d v;
0461 } uvec2;
0462 
0463 int
0464 get_NUBasis_funcs_sse_s (NUBasis* restrict basis, double x,
0465              __m128 *restrict funcs)
0466 {
0467   double b1[2], b2[3];
0468   int i = (*basis->grid->reverse_map)(basis->grid, x);
0469   int i2 = i+2;
0470   double* restrict dxInv = basis->dxInv;
0471   double* restrict xVals = basis->xVals;
0472   
0473   uvec4 bfuncs;
0474   
0475 
0476   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0477   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0478   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0479   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0480            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0481   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0482   bfuncs.s[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0483   bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0484            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0485   bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0486            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0487   bfuncs.s[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2];
0488   *funcs = bfuncs.v;
0489   return i;
0490 }
0491 
0492 int
0493 get_NUBasis_dfuncs_sse_s (NUBasis* restrict basis, double x,
0494               __m128 *restrict funcs, __m128 *restrict dfuncs)
0495 {
0496   double b1[2], b2[3];
0497   int i = (*basis->grid->reverse_map)(basis->grid, x);
0498   int i2 = i+2;
0499   double* restrict dxInv = basis->dxInv;
0500   double* restrict xVals = basis->xVals;
0501   uvec4 bfuncs, dbfuncs;
0502 
0503 
0504   b1[0]       = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0505   b1[1]       = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0506   b2[0]       = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0507   b2[1]       = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0508          (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0509   b2[2]       = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0510   bfuncs.s[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0511   bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0512          (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0513   bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0514          (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0515   bfuncs.s[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
0516 
0517   dbfuncs.s[0] = -3.0f * (dxInv[3*(i  )+2] * b2[0]);
0518   dbfuncs.s[1] =  3.0f * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
0519   dbfuncs.s[2] =  3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
0520   dbfuncs.s[3] =  3.0f * (dxInv[3*(i+2)+2] * b2[2]);
0521 
0522   *funcs  =  bfuncs.v;
0523   *dfuncs = dbfuncs.v;
0524 
0525   return i;
0526 }
0527 
0528 int
0529 get_NUBasis_d2funcs_sse_s (NUBasis* restrict basis, double x,
0530                __m128 *restrict funcs, __m128 *restrict dfuncs, __m128 *restrict d2funcs)
0531 {
0532   double b1[2], b2[3];
0533   int i = (*basis->grid->reverse_map)(basis->grid, x);
0534   int i2 = i+2;
0535   double* restrict dxInv = basis->dxInv;
0536   double* restrict xVals = basis->xVals;
0537   uvec4 bfuncs, dbfuncs, d2bfuncs;
0538 
0539   b1[0]       = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0540   b1[1]       = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0541   b2[0]       = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0542   b2[1]       = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0543          (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0544   b2[2]       = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0545   bfuncs.s[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0546   bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0547          (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0548   bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0549          (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0550   bfuncs.s[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
0551 
0552   dbfuncs.s[0]  = -3.0f * (dxInv[3*(i  )+2] * b2[0]);
0553   dbfuncs.s[1]  =  3.0f * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
0554   dbfuncs.s[2]  =  3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
0555   dbfuncs.s[3]  =  3.0f * (dxInv[3*(i+2)+2] * b2[2]);
0556 
0557   d2bfuncs.s[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
0558   d2bfuncs.s[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
0559              dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
0560   d2bfuncs.s[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
0561              dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
0562   d2bfuncs.s[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
0563 
0564   *funcs   =   bfuncs.v;
0565   *dfuncs  =  dbfuncs.v;
0566   *d2funcs = d2bfuncs.v;
0567 
0568   return i;
0569 }
0570 
0571 
0572 //////////////////////////////
0573 // Double-precision version //
0574 //////////////////////////////
0575 int
0576 get_NUBasis_funcs_sse_d (NUBasis* restrict basis, double x,
0577               __m128d *restrict   f01, __m128d *restrict   f23)
0578 {
0579   double b1[2], b2[3];
0580   int i = (*basis->grid->reverse_map)(basis->grid, x);
0581   int i2 = i+2;
0582   double* restrict dxInv = basis->dxInv;
0583   double* restrict xVals = basis->xVals;
0584   uvec2 bf01, bf23, dbf01, dbf23;
0585 
0586   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0587   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0588   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0589   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0590            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0591   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0592   bf01.s[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0593   bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0594            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0595   bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0596            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0597   bf23.s[1] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
0598 
0599   *f01   =   bf01.v;  *f23 =   bf23.v;
0600   return i;
0601 }
0602 
0603 int
0604 get_NUBasis_dfuncs_sse_d (NUBasis* restrict basis, double x,
0605               __m128d *restrict   f01, __m128d *restrict   f23,
0606               __m128d *restrict  df01, __m128d *restrict  df23)
0607 
0608 {
0609   double b1[2], b2[3];
0610   int i = (*basis->grid->reverse_map)(basis->grid, x);
0611   int i2 = i+2;
0612   double* restrict dxInv = basis->dxInv;
0613   double* restrict xVals = basis->xVals;
0614   uvec2 bf01, bf23, dbf01, dbf23;
0615 
0616   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0617   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0618   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0619   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0620            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0621   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0622   bf01.s[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0623   bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0624            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0625   bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0626            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0627   bf23.s[1] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
0628 
0629   dbf01.s[0] = -3.0f * (dxInv[3*(i  )+2] * b2[0]);
0630   dbf01.s[1] =  3.0f * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
0631   dbf23.s[0] =  3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
0632   dbf23.s[1] =  3.0f * (dxInv[3*(i+2)+2] * b2[2]);
0633 
0634   *f01   =   bf01.v;   *f23 =   bf23.v;
0635   *df01  =  dbf01.v;  *df23 =  dbf23.v;
0636 
0637   return i;
0638 }
0639 
0640 int
0641 get_NUBasis_d2funcs_sse_d (NUBasis* restrict basis, double x,
0642                __m128d *restrict   f01, __m128d *restrict   f23,
0643                __m128d *restrict  df01, __m128d *restrict  df23,
0644                __m128d *restrict d2f01, __m128d *restrict d2f23)
0645 {
0646   double b1[2], b2[3];
0647   int i = (*basis->grid->reverse_map)(basis->grid, x);
0648   int i2 = i+2;
0649   double* restrict dxInv = basis->dxInv;
0650   double* restrict xVals = basis->xVals;
0651   uvec2 bf01, bf23, dbf01, dbf23, d2bf01, d2bf23;
0652   
0653   b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
0654   b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
0655   b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
0656   b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
0657            (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
0658   b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
0659   bf01.s[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
0660   bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
0661            (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
0662   bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
0663            (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
0664   bf23.s[1] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
0665   
0666   dbf01.s[0] = -3.0f * (dxInv[3*(i  )+2] * b2[0]);
0667   dbf01.s[1] =  3.0f * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
0668   dbf23.s[0] =  3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
0669   dbf23.s[1] =  3.0f * (dxInv[3*(i+2)+2] * b2[2]);
0670   
0671   d2bf01.s[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
0672   d2bf01.s[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
0673                dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
0674   d2bf23.s[0] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
0675                dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
0676   d2bf23.s[1] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
0677   
0678   *f01   =   bf01.v;    *f23 =   bf23.v;
0679   *df01  =  dbf01.v;   *df23 =  dbf23.v;
0680   *d2f01 = d2bf01.v;  *d2f23 = d2bf23.v;
0681   
0682   return i;
0683 }
0684 
0685 #endif