File indexing completed on 2024-12-22 04:09:37

0001 /////////////////////////////////////////////////////////////////////////////
0002 //  einspline:  a library for creating and evaluating B-splines            //
0003 //  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
0004 //                                                                         //
0005 //  This program is free software; you can redistribute it and/or modify   //
0006 //  it under the terms of the GNU General Public License as published by   //
0007 //  the Free Software Foundation; either version 2 of the License, or      //
0008 //  (at your option) any later version.                                    //
0009 //                                                                         //
0010 //  This program is distributed in the hope that it will be useful,        //
0011 //  but WITHOUT ANY WARRANTY; without even the implied warranty of         //
0012 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
0013 //  GNU General Public License for more details.                           //
0014 //                                                                         //
0015 //  You should have received a copy of the GNU General Public License      //
0016 //  along with this program; if not, write to the Free Software            //
0017 //  Foundation, Inc., 51 Franklin Street, Fifth Floor,                     //
0018 //  Boston, MA  02110-1301  USA                                            //
0019 /////////////////////////////////////////////////////////////////////////////
0020 
0021 #include "bspline_create.h"
0022 #ifndef _XOPEN_SOURCE
0023 #define _XOPEN_SOURCE 600
0024 #endif
0025 #ifndef __USE_XOPEN2K
0026   #define __USE_XOPEN2K
0027 #endif
0028 #include <stdlib.h>
0029 #include <stdio.h>
0030 #include <vector>
0031 
0032 int posix_memalign(void **memptr, size_t alignment, size_t size);
0033 
0034 ////////////////////////////////////////////////////////////
0035 ////////////////////////////////////////////////////////////
0036 ////       Helper functions for spline creation         ////
0037 ////////////////////////////////////////////////////////////
0038 ////////////////////////////////////////////////////////////
0039 void init_sse_data();
0040 
0041 void
0042 find_coefs_1d_d (Ugrid grid, BCtype_d bc, 
0043          double *data,  intptr_t dstride,
0044          double *coefs, intptr_t cstride);
0045 
0046 void 
0047 solve_deriv_interp_1d_s (float bands[], float coefs[],
0048              int M, int cstride)
0049 {
0050   // Solve interpolating equations
0051   // First and last rows are different
0052   bands[4*(0)+1] /= bands[4*(0)+0];
0053   bands[4*(0)+2] /= bands[4*(0)+0];
0054   bands[4*(0)+3] /= bands[4*(0)+0];
0055   bands[4*(0)+0] = 1.0;
0056   bands[4*(1)+1] -= bands[4*(1)+0]*bands[4*(0)+1];
0057   bands[4*(1)+2] -= bands[4*(1)+0]*bands[4*(0)+2];
0058   bands[4*(1)+3] -= bands[4*(1)+0]*bands[4*(0)+3];
0059   bands[4*(0)+0] = 0.0;
0060   bands[4*(1)+2] /= bands[4*(1)+1];
0061   bands[4*(1)+3] /= bands[4*(1)+1];
0062   bands[4*(1)+1] = 1.0;
0063   
0064   // Now do rows 2 through M+1
0065   for (int row=2; row < (M+1); row++) {
0066     bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2];
0067     bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3];
0068     bands[4*(row)+2] /= bands[4*(row)+1];
0069     bands[4*(row)+3] /= bands[4*(row)+1];
0070     bands[4*(row)+0] = 0.0;
0071     bands[4*(row)+1] = 1.0;
0072   }
0073 
0074   // Do last row
0075   bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2];
0076   bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3];
0077   bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2];
0078   bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3];
0079   bands[4*(M+1)+3] /= bands[4*(M+1)+2];
0080   bands[4*(M+1)+2] = 1.0;
0081 
0082   coefs[(M+1)*cstride] = bands[4*(M+1)+3];
0083   // Now back substitute up
0084   for (int row=M; row>0; row--)
0085     coefs[row*cstride] = bands[4*(row)+3] - bands[4*(row)+2]*coefs[cstride*(row+1)];
0086   
0087   // Finish with first row
0088   coefs[0] = bands[4*(0)+3] - bands[4*(0)+1]*coefs[1*cstride] - bands[4*(0)+2]*coefs[2*cstride];
0089 }
0090 
0091 // On input, bands should be filled with:
0092 // row 0   :  abcdInitial from boundary conditions
0093 // rows 1:M:  basis functions in first 3 cols, data in last
0094 // row M+1 :  abcdFinal   from boundary conditions
0095 // cstride gives the stride between values in coefs.
0096 // On exit, coefs with contain interpolating B-spline coefs
0097 void 
0098 solve_periodic_interp_1d_s (float bands[], float coefs[],
0099                 int M, int cstride)
0100 {
0101   std::vector<float> lastCol(M);
0102 
0103   // Now solve:
0104   // First and last rows are different
0105   bands[4*(0)+2] /= bands[4*(0)+1];
0106   bands[4*(0)+0] /= bands[4*(0)+1];
0107   bands[4*(0)+3] /= bands[4*(0)+1];
0108   bands[4*(0)+1]  = 1.0;
0109   bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
0110   bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
0111   bands[4*(M-1)+2]  = -bands[4*(M-1)+2]*bands[4*(0)+2];
0112   lastCol[0] = bands[4*(0)+0];
0113   
0114   for (int row=1; row < (M-1); row++) {
0115     bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
0116     bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
0117     lastCol[row]   = -bands[4*(row)+0] * lastCol[row-1];
0118     bands[4*(row)+0] = 0.0;
0119     bands[4*(row)+2] /= bands[4*(row)+1];
0120     bands[4*(row)+3] /= bands[4*(row)+1];
0121     lastCol[row]  /= bands[4*(row)+1];
0122     bands[4*(row)+1]  = 1.0;
0123     if (row < (M-2)) {
0124       bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
0125       bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
0126       bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
0127     }
0128   }
0129 
0130   // Now do last row
0131   // The [2] element and [0] element are now on top of each other 
0132   bands[4*(M-1)+0] += bands[4*(M-1)+2];
0133   bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
0134   bands[4*(M-1)+3] -= bands[4*(M-1)+0] *  bands[4*(M-2)+3];
0135   bands[4*(M-1)+3] /= bands[4*(M-1)+1];
0136   coefs[M*cstride] = bands[4*(M-1)+3];
0137   for (int row=M-2; row>=0; row--) 
0138     coefs[(row+1)*cstride] = 
0139       bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride];
0140   
0141   coefs[0*cstride] = coefs[M*cstride];
0142   coefs[(M+1)*cstride] = coefs[1*cstride];
0143   coefs[(M+2)*cstride] = coefs[2*cstride];
0144 
0145 
0146 }
0147 
0148 
0149 // On input, bands should be filled with:
0150 // row 0   :  abcdInitial from boundary conditions
0151 // rows 1:M:  basis functions in first 3 cols, data in last
0152 // row M+1 :  abcdFinal   from boundary conditions
0153 // cstride gives the stride between values in coefs.
0154 // On exit, coefs with contain interpolating B-spline coefs
0155 void 
0156 solve_antiperiodic_interp_1d_s (float bands[], float coefs[],
0157                 int M, int cstride)
0158 {
0159   bands[4*0+0]     *= -1.0;
0160   bands[4*(M-1)+2] *= -1.0;
0161 
0162   std::vector<float> lastCol(M);
0163 
0164   // Now solve:
0165   // First and last rows are different
0166   bands[4*(0)+2] /= bands[4*(0)+1];
0167   bands[4*(0)+0] /= bands[4*(0)+1];
0168   bands[4*(0)+3] /= bands[4*(0)+1];
0169   bands[4*(0)+1]  = 1.0;
0170   bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
0171   bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
0172   bands[4*(M-1)+2]  = -bands[4*(M-1)+2]*bands[4*(0)+2];
0173   lastCol[0] = bands[4*(0)+0];
0174   
0175   for (int row=1; row < (M-1); row++) {
0176     bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
0177     bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
0178     lastCol[row]   = -bands[4*(row)+0] * lastCol[row-1];
0179     bands[4*(row)+0] = 0.0;
0180     bands[4*(row)+2] /= bands[4*(row)+1];
0181     bands[4*(row)+3] /= bands[4*(row)+1];
0182     lastCol[row]  /= bands[4*(row)+1];
0183     bands[4*(row)+1]  = 1.0;
0184     if (row < (M-2)) {
0185       bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
0186       bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
0187       bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
0188     }
0189   }
0190 
0191   // Now do last row
0192   // The [2] element and [0] element are now on top of each other 
0193   bands[4*(M-1)+0] += bands[4*(M-1)+2];
0194   bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
0195   bands[4*(M-1)+3] -= bands[4*(M-1)+0] *  bands[4*(M-2)+3];
0196   bands[4*(M-1)+3] /= bands[4*(M-1)+1];
0197   coefs[M*cstride] = bands[4*(M-1)+3];
0198   for (int row=M-2; row>=0; row--) 
0199     coefs[(row+1)*cstride] = 
0200       bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride];
0201   
0202   coefs[0*cstride]     = -coefs[M*cstride];
0203   coefs[(M+1)*cstride] = -coefs[1*cstride];
0204   coefs[(M+2)*cstride] = -coefs[2*cstride];
0205 
0206 
0207 }
0208 
0209 
0210 
0211 
0212 #ifdef HIGH_PRECISION
0213 void
0214 find_coefs_1d_s (Ugrid grid, BCtype_s bc, 
0215          float *data,  intptr_t dstride,
0216          float *coefs, intptr_t cstride)
0217 {
0218   BCtype_d d_bc;
0219   double *d_data, *d_coefs;
0220 
0221   d_bc.lCode = bc.lCode;   d_bc.rCode = bc.rCode;
0222   d_bc.lVal  = bc.lVal;    d_bc.rVal  = bc.rVal;
0223   int M = grid.num, N;
0224   if (bc.lCode == PERIODIC || bc.lCode == ANTIPERIODIC)    N = M+3;
0225   else                         N = M+2;
0226 
0227   d_data  = new double[N];
0228   d_coefs = new double[N];
0229   for (int i=0; i<M; i++)
0230     d_data[i] = data[i*dstride];
0231   find_coefs_1d_d (grid, d_bc, d_data, 1, d_coefs, 1);
0232   for (int i=0; i<N; i++)
0233     coefs[i*cstride] = d_coefs[i];
0234   delete[] d_data;
0235   delete[] d_coefs;
0236 }
0237 
0238 #else
0239 void
0240 find_coefs_1d_s (Ugrid grid, BCtype_s bc, 
0241          float *data,  intptr_t dstride,
0242          float *coefs, intptr_t cstride)
0243 {
0244   int M = grid.num;
0245   float basis[4] = {1.0/6.0, 2.0/3.0, 1.0/6.0, 0.0};
0246   if (bc.lCode == PERIODIC || bc.lCode == ANTIPERIODIC) {
0247 
0248     float *bands = new float[4*M];
0249 
0250     for (int i=0; i<M; i++) {
0251       bands[4*i+0] = basis[0];
0252       bands[4*i+1] = basis[1];
0253       bands[4*i+2] = basis[2];
0254       bands[4*i+3] = data[i*dstride];
0255     }
0256     if (bc.lCode == PERIODIC)
0257       solve_periodic_interp_1d_s (bands, coefs, M, cstride);
0258     else
0259       solve_antiperiodic_interp_1d_s (bands, coefs, M, cstride);
0260 
0261     delete[] bands;
0262   }
0263   else {
0264     // Setup boundary conditions
0265     float abcd_left[4];
0266     float abcd_right[4];
0267     for (int i = 0; i < 4; i++) {
0268         abcd_left[i] = 0.0;
0269         abcd_right[i] = 0.0;
0270     }
0271 
0272     // Left boundary
0273     if (bc.lCode == FLAT || bc.lCode == NATURAL)
0274       bc.lVal = 0.0;
0275     if (bc.lCode == FLAT || bc.lCode == DERIV1) {
0276       abcd_left[0] = -0.5 * grid.delta_inv;
0277       abcd_left[1] =  0.0 * grid.delta_inv; 
0278       abcd_left[2] =  0.5 * grid.delta_inv;
0279       abcd_left[3] =  bc.lVal;
0280     }
0281     if (bc.lCode == NATURAL || bc.lCode == DERIV2) {
0282       abcd_left[0] = 1.0 * grid.delta_inv * grid.delta_inv;
0283       abcd_left[1] =-2.0 * grid.delta_inv * grid.delta_inv;
0284       abcd_left[2] = 1.0 * grid.delta_inv * grid.delta_inv;
0285       abcd_left[3] = bc.lVal;
0286     }
0287     
0288     // Right boundary
0289     if (bc.rCode == FLAT || bc.rCode == NATURAL)
0290       bc.rVal = 0.0;
0291     if (bc.rCode == FLAT || bc.rCode == DERIV1) {
0292       abcd_right[0] = -0.5 * grid.delta_inv;
0293       abcd_right[1] =  0.0 * grid.delta_inv; 
0294       abcd_right[2] =  0.5 * grid.delta_inv;
0295       abcd_right[3] =  bc.rVal;
0296     }
0297     if (bc.rCode == NATURAL || bc.rCode == DERIV2) {
0298       abcd_right[0] = 1.0 *grid.delta_inv * grid.delta_inv;
0299       abcd_right[1] =-2.0 *grid.delta_inv * grid.delta_inv;
0300       abcd_right[2] = 1.0 *grid.delta_inv * grid.delta_inv;
0301       abcd_right[3] = bc.rVal;
0302     }
0303 
0304     float *bands = new float[4*(M+2)];
0305     for (int i=0; i<4; i++) {
0306       bands[4*( 0 )+i]   = abcd_left[i];
0307       bands[4*(M+1)+i] = abcd_right[i];
0308     }
0309     for (int i=0; i<M; i++) {
0310       for (int j=0; j<3; j++)
0311     bands[4*(i+1)+j] = basis[j];
0312       bands[4*(i+1)+3] = data[i*dstride];
0313     }   
0314     // Now, solve for coefficients
0315     solve_deriv_interp_1d_s (bands, coefs, M, cstride);
0316     delete[] bands;
0317   }
0318 }
0319 
0320 #endif
0321 
0322 ////////////////////////////////////////////////////////////
0323 ////////////////////////////////////////////////////////////
0324 ////     Single-Precision, Real Creation Routines       ////
0325 ////////////////////////////////////////////////////////////
0326 ////////////////////////////////////////////////////////////
0327 
0328 // On input, bands should be filled with:
0329 // row 0   :  abcdInitial from boundary conditions
0330 // rows 1:M:  basis functions in first 3 cols, data in last
0331 // row M+1 :  abcdFinal   from boundary conditions
0332 // cstride gives the stride between values in coefs.
0333 // On exit, coefs with contain interpolating B-spline coefs
0334 UBspline_1d_s*
0335 create_UBspline_1d_s (Ugrid x_grid, BCtype_s xBC, float *data)
0336 {
0337   // Create new spline
0338   UBspline_1d_s* restrict spline = static_cast<UBspline_1d_s*>(malloc(sizeof(UBspline_1d_s)));
0339   spline->spcode = U1D;
0340   spline->tcode  = SINGLE_REAL;
0341   spline->xBC = xBC; spline->x_grid = x_grid;
0342 
0343   // Setup internal variables
0344   int M = x_grid.num;
0345   int N;
0346 
0347   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
0348     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num);
0349     N = M+3;
0350   }
0351   else {
0352     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
0353     N = M+2;
0354   }
0355 
0356   x_grid.delta_inv = 1.0/x_grid.delta;
0357   spline->x_grid   = x_grid;
0358 #ifndef HAVE_SSE2
0359   spline->coefs = (float*)malloc (sizeof(float)*N);
0360 #else
0361   posix_memalign ((void**)&spline->coefs, 16, (sizeof(float)*N));
0362 #endif
0363   find_coefs_1d_s (spline->x_grid, xBC, data, 1, spline->coefs, 1);
0364 
0365   init_sse_data();    
0366   return spline;
0367 }
0368 
0369 void
0370 recompute_UBspline_1d_s (UBspline_1d_s* spline, float *data)
0371 {
0372   find_coefs_1d_s (spline->x_grid, spline->xBC, data, 1, spline->coefs, 1);
0373 }
0374 
0375 
0376 UBspline_2d_s*
0377 create_UBspline_2d_s (Ugrid x_grid, Ugrid y_grid,
0378               BCtype_s xBC, BCtype_s yBC, float *data)
0379 {
0380     // Create new spline
0381   UBspline_2d_s* restrict spline = static_cast<UBspline_2d_s*>(malloc(sizeof(UBspline_2d_s)));
0382   spline->spcode = U2D;
0383   spline->tcode  = SINGLE_REAL;
0384   spline->xBC = xBC; 
0385   spline->yBC = yBC; 
0386   // Setup internal variables
0387   int Mx = x_grid.num;
0388   int My = y_grid.num;
0389   int Nx, Ny;
0390 
0391   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
0392   else                           Nx = Mx+2;
0393   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
0394   x_grid.delta_inv = 1.0/x_grid.delta;
0395   spline->x_grid   = x_grid;
0396 
0397   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     Ny = My+3;
0398   else                           Ny = My+2;
0399   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
0400   y_grid.delta_inv = 1.0/y_grid.delta;
0401   spline->y_grid   = y_grid;
0402   spline->x_stride = Ny;
0403 #ifndef HAVE_SSE2
0404   spline->coefs = (float*)malloc (sizeof(float)*Nx*Ny);
0405 #else
0406   posix_memalign ((void**)&spline->coefs, 16, sizeof(float)*Nx*Ny);
0407 #endif
0408 
0409   // First, solve in the X-direction 
0410   for (int iy=0; iy<My; iy++) {
0411     intptr_t doffset = iy;
0412     intptr_t coffset = iy;
0413     find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, My,
0414              spline->coefs+coffset, Ny);
0415   }
0416   
0417   // Now, solve in the Y-direction
0418   for (int ix=0; ix<Nx; ix++) {
0419     intptr_t doffset = ix*Ny;
0420     intptr_t coffset = ix*Ny;
0421     find_coefs_1d_s (spline->y_grid, spline->yBC, spline->coefs+doffset, 1, 
0422              spline->coefs+coffset, 1);
0423   }
0424   init_sse_data();
0425   return spline;
0426 }
0427 
0428 void
0429 recompute_UBspline_2d_s (UBspline_2d_s* spline, float *data)
0430 {
0431   int Mx = spline->x_grid.num;
0432   int My = spline->y_grid.num;
0433   int Nx, Ny;
0434 
0435   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
0436   else                           Nx = Mx+2;
0437   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     Ny = My+3;
0438   else                           Ny = My+2;
0439 
0440   // First, solve in the X-direction 
0441   for (int iy=0; iy<My; iy++) {
0442     intptr_t doffset = iy;
0443     intptr_t coffset = iy;
0444     find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, My,
0445              spline->coefs+coffset, Ny);
0446   }
0447   
0448   // Now, solve in the Y-direction
0449   for (int ix=0; ix<Nx; ix++) {
0450     intptr_t doffset = ix*Ny;
0451     intptr_t coffset = ix*Ny;
0452     find_coefs_1d_s (spline->y_grid, spline->yBC, spline->coefs+doffset, 1, 
0453              spline->coefs+coffset, 1);
0454   }
0455 }
0456 
0457 
0458 UBspline_3d_s*
0459 create_UBspline_3d_s (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
0460               BCtype_s xBC, BCtype_s yBC, BCtype_s zBC,
0461               float *data)
0462 {
0463   // Create new spline
0464   UBspline_3d_s* restrict spline = static_cast<UBspline_3d_s*>(malloc(sizeof(UBspline_3d_s)));
0465   spline->spcode = U3D;
0466   spline->tcode  = SINGLE_REAL;
0467   spline->xBC = xBC; 
0468   spline->yBC = yBC; 
0469   spline->zBC = zBC; 
0470   // Setup internal variables
0471   int Mx = x_grid.num;  int My = y_grid.num; int Mz = z_grid.num;
0472   int Nx, Ny, Nz;
0473 
0474   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
0475   else                           Nx = Mx+2;
0476   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
0477   x_grid.delta_inv = 1.0/x_grid.delta;
0478   spline->x_grid   = x_grid;
0479 
0480   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     Ny = My+3;
0481   else                           Ny = My+2;
0482   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
0483   y_grid.delta_inv = 1.0/y_grid.delta;
0484   spline->y_grid   = y_grid;
0485 
0486   if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC)     Nz = Mz+3;
0487   else                           Nz = Mz+2;
0488   z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
0489   z_grid.delta_inv = 1.0/z_grid.delta;
0490   spline->z_grid   = z_grid;
0491 
0492   spline->x_stride = Ny*Nz;
0493   spline->y_stride = Nz;
0494 
0495 #ifndef HAVE_SSE2
0496   spline->coefs      = new float[Nx*Ny*Nz];
0497 #else
0498   posix_memalign ((void**)&spline->coefs, 16, (sizeof(float)*Nx*Ny*Nz));
0499 #endif
0500 
0501   // First, solve in the X-direction 
0502   for (int iy=0; iy<My; iy++) 
0503     for (int iz=0; iz<Mz; iz++) {
0504       intptr_t doffset = iy*Mz+iz;
0505       intptr_t coffset = iy*Nz+iz;
0506       find_coefs_1d_s (spline->x_grid, xBC, data+doffset, My*Mz,
0507                spline->coefs+coffset, Ny*Nz);
0508     }
0509   
0510   // Now, solve in the Y-direction
0511   for (int ix=0; ix<Nx; ix++) 
0512     for (int iz=0; iz<Nz; iz++) {
0513       intptr_t doffset = ix*Ny*Nz + iz;
0514       intptr_t coffset = ix*Ny*Nz + iz;
0515       find_coefs_1d_s (spline->y_grid, yBC, spline->coefs+doffset, Nz, 
0516                spline->coefs+coffset, Nz);
0517     }
0518 
0519   // Now, solve in the Z-direction
0520   for (int ix=0; ix<Nx; ix++) 
0521     for (int iy=0; iy<Ny; iy++) {
0522       intptr_t doffset = (ix*Ny+iy)*Nz;
0523       intptr_t coffset = (ix*Ny+iy)*Nz;
0524       find_coefs_1d_s (spline->z_grid, zBC, spline->coefs+doffset, 1, 
0525                spline->coefs+coffset, 1);
0526     }
0527   init_sse_data();
0528   return spline;
0529 }
0530 
0531 void
0532 recompute_UBspline_3d_s (UBspline_3d_s* spline, float *data)
0533 {
0534   int Mx = spline->x_grid.num;
0535   int My = spline->y_grid.num;
0536   int Mz = spline->z_grid.num;
0537   int Nx, Ny, Nz;
0538 
0539   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
0540   else                           Nx = Mx+2;
0541   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     Ny = My+3;
0542   else                           Ny = My+2;
0543   if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC)     Nz = Mz+3;
0544   else                           Nz = Mz+2;
0545 
0546   // First, solve in the X-direction 
0547   for (int iy=0; iy<My; iy++) 
0548     for (int iz=0; iz<Mz; iz++) {
0549       intptr_t doffset = iy*Mz+iz;
0550       intptr_t coffset = iy*Nz+iz;
0551       find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, My*Mz,
0552                spline->coefs+coffset, Ny*Nz);
0553     }
0554   
0555   // Now, solve in the Y-direction
0556   for (int ix=0; ix<Nx; ix++) 
0557     for (int iz=0; iz<Nz; iz++) {
0558       intptr_t doffset = ix*Ny*Nz + iz;
0559       intptr_t coffset = ix*Ny*Nz + iz;
0560       find_coefs_1d_s (spline->y_grid, spline->yBC, spline->coefs+doffset, Nz, 
0561                spline->coefs+coffset, Nz);
0562     }
0563 
0564   // Now, solve in the Z-direction
0565   for (int ix=0; ix<Nx; ix++) 
0566     for (int iy=0; iy<Ny; iy++) {
0567       intptr_t doffset = (ix*Ny+iy)*Nz;
0568       intptr_t coffset = (ix*Ny+iy)*Nz;
0569       find_coefs_1d_s (spline->z_grid, spline->zBC, spline->coefs+doffset, 1, 
0570                spline->coefs+coffset, 1);
0571     }
0572 }
0573 
0574 
0575 ////////////////////////////////////////////////////////////
0576 ////////////////////////////////////////////////////////////
0577 ////    Single-Precision, Complex Creation Routines     ////
0578 ////////////////////////////////////////////////////////////
0579 ////////////////////////////////////////////////////////////
0580 
0581 // On input, bands should be filled with:
0582 // row 0   :  abcdInitial from boundary conditions
0583 // rows 1:M:  basis functions in first 3 cols, data in last
0584 // row M+1 :  abcdFinal   from boundary conditions
0585 // cstride gives the stride between values in coefs.
0586 // On exit, coefs with contain interpolating B-spline coefs
0587 UBspline_1d_c*
0588 create_UBspline_1d_c (Ugrid x_grid, BCtype_c xBC, complex_float *data)
0589 {
0590   // Create new spline
0591   UBspline_1d_c* restrict spline = static_cast<UBspline_1d_c*>(malloc(sizeof(UBspline_1d_c)));
0592   spline->spcode = U1D;
0593   spline->tcode  = SINGLE_COMPLEX;
0594   spline->xBC = xBC; 
0595   // Setup internal variables
0596   int M = x_grid.num;
0597   int N;
0598 
0599   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
0600     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num);
0601     N = M+3;
0602   }
0603   else {
0604     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
0605     N = M+2;
0606   }
0607 
0608   x_grid.delta_inv = 1.0/x_grid.delta;
0609   spline->x_grid   = x_grid;
0610 #ifndef HAVE_SSE2
0611   spline->coefs = (complex_float*)malloc (2*sizeof(float)*N);
0612 #else
0613   posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*N);
0614 #endif
0615 
0616   BCtype_s xBC_r, xBC_i;
0617   xBC_r.lCode = xBC.lCode;  xBC_r.rCode = xBC.rCode;
0618   xBC_r.lVal  = xBC.lVal_r; xBC_r.rVal  = xBC.rVal_r;
0619   xBC_i.lCode = xBC.lCode;  xBC_i.rCode = xBC.rCode;
0620   xBC_i.lVal  = xBC.lVal_i; xBC_i.rVal  = xBC.rVal_i;
0621   // Real part
0622   find_coefs_1d_s (spline->x_grid, xBC_r, 
0623            (float*)data, 2, (float*)spline->coefs, 2);
0624   // Imaginarty part
0625   find_coefs_1d_s (spline->x_grid, xBC_i, 
0626            ((float*)data)+1, 2, ((float*)spline->coefs+1), 2);
0627 
0628   init_sse_data();    
0629   return spline;
0630 }
0631 
0632 void
0633 recompute_UBspline_1d_c (UBspline_1d_c* spline, complex_float *data)
0634 {
0635   
0636   BCtype_s xBC_r, xBC_i;
0637   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
0638   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
0639   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
0640   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
0641 
0642   // Real part
0643   find_coefs_1d_s (spline->x_grid, xBC_r, 
0644            (float*)data, 2, (float*)spline->coefs, 2);
0645   // Imaginarty part
0646   find_coefs_1d_s (spline->x_grid, xBC_i, 
0647            ((float*)data)+1, 2, ((float*)spline->coefs+1), 2);
0648 }
0649 
0650 
0651 
0652 UBspline_2d_c*
0653 create_UBspline_2d_c (Ugrid x_grid, Ugrid y_grid,
0654               BCtype_c xBC, BCtype_c yBC, complex_float *data)
0655 {
0656   // Create new spline
0657   UBspline_2d_c* restrict spline = static_cast<UBspline_2d_c*>(malloc(sizeof(UBspline_2d_c)));
0658   spline->spcode = U2D;
0659   spline->tcode  = SINGLE_COMPLEX;
0660   spline->xBC = xBC; 
0661   spline->yBC = yBC; 
0662 
0663   // Setup internal variables
0664   int Mx = x_grid.num;
0665   int My = y_grid.num;
0666   int Nx, Ny;
0667 
0668   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
0669   else                           Nx = Mx+2;
0670   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
0671   x_grid.delta_inv = 1.0/x_grid.delta;
0672   spline->x_grid   = x_grid;
0673 
0674   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     Ny = My+3;
0675   else                           Ny = My+2;
0676   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
0677   y_grid.delta_inv = 1.0/y_grid.delta;
0678   spline->y_grid   = y_grid;
0679   spline->x_stride = Ny;
0680 
0681 #ifndef HAVE_SSE2
0682   spline->coefs = (complex_float*)malloc (2*sizeof(float)*Nx*Ny);
0683 #else
0684   posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*Nx*Ny);
0685 #endif
0686 
0687   BCtype_s xBC_r, xBC_i, yBC_r, yBC_i;
0688   xBC_r.lCode = xBC.lCode;  xBC_r.rCode = xBC.rCode;
0689   xBC_r.lVal  = xBC.lVal_r; xBC_r.rVal  = xBC.rVal_r;
0690   xBC_i.lCode = xBC.lCode;  xBC_i.rCode = xBC.rCode;
0691   xBC_i.lVal  = xBC.lVal_i; xBC_i.rVal  = xBC.rVal_i;
0692   yBC_r.lCode = yBC.lCode;  yBC_r.rCode = yBC.rCode;
0693   yBC_r.lVal  = yBC.lVal_r; yBC_r.rVal  = yBC.rVal_r;
0694   yBC_i.lCode = yBC.lCode;  yBC_i.rCode = yBC.rCode;
0695   yBC_i.lVal  = yBC.lVal_i; yBC_i.rVal  = yBC.rVal_i;
0696   // First, solve in the X-direction 
0697   for (int iy=0; iy<My; iy++) {
0698     intptr_t doffset = 2*iy;
0699     intptr_t coffset = 2*iy;
0700     // Real part
0701     find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My,
0702              (float*)spline->coefs+coffset, 2*Ny);
0703     // Imag part
0704     find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My,
0705              ((float*)spline->coefs)+coffset+1, 2*Ny);
0706   }
0707   
0708   // Now, solve in the Y-direction
0709   for (int ix=0; ix<Nx; ix++) {
0710     intptr_t doffset = 2*ix*Ny;
0711     intptr_t coffset = 2*ix*Ny;
0712     // Real part
0713     find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2, 
0714              ((float*)spline->coefs)+coffset, 2);
0715     // Imag part
0716     find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2, 
0717              ((float*)spline->coefs)+coffset+1, 2);
0718   }
0719   init_sse_data();
0720   return spline;
0721 }
0722 
0723 
0724 void
0725 recompute_UBspline_2d_c (UBspline_2d_c* spline, complex_float *data)
0726 {
0727   // Setup internal variables
0728   int Mx = spline->x_grid.num;
0729   int My = spline->y_grid.num;
0730   int Nx, Ny;
0731 
0732   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     
0733     Nx = Mx+3;
0734   else                           
0735     Nx = Mx+2;
0736   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     
0737     Ny = My+3;
0738   else                           
0739     Ny = My+2;
0740 
0741   BCtype_s xBC_r, xBC_i, yBC_r, yBC_i;
0742   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
0743   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
0744   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
0745   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
0746   yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
0747   yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
0748   yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
0749   yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
0750  
0751   // First, solve in the X-direction 
0752   for (int iy=0; iy<My; iy++) {
0753     intptr_t doffset = 2*iy;
0754     intptr_t coffset = 2*iy;
0755     // Real part
0756     find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My,
0757              (float*)spline->coefs+coffset, 2*Ny);
0758     // Imag part
0759     find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My,
0760              ((float*)spline->coefs)+coffset+1, 2*Ny);
0761   }
0762   
0763   // Now, solve in the Y-direction
0764   for (int ix=0; ix<Nx; ix++) {
0765     intptr_t doffset = 2*ix*Ny;
0766     intptr_t coffset = 2*ix*Ny;
0767     // Real part
0768     find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2, 
0769              ((float*)spline->coefs)+coffset, 2);
0770     // Imag part
0771     find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2, 
0772              ((float*)spline->coefs)+coffset+1, 2);
0773   }  
0774 }
0775 
0776 UBspline_3d_c*
0777 create_UBspline_3d_c (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
0778               BCtype_c xBC, BCtype_c yBC, BCtype_c zBC,
0779               complex_float *data)
0780 {
0781   // Create new spline
0782   UBspline_3d_c* restrict spline = static_cast<UBspline_3d_c*>(malloc(sizeof(UBspline_3d_c)));
0783   spline->spcode = U3D;
0784   spline->tcode  = SINGLE_COMPLEX;
0785   spline->xBC = xBC; 
0786   spline->yBC = yBC; 
0787   spline->zBC = zBC; 
0788 
0789   // Setup internal variables
0790   int Mx = x_grid.num;  int My = y_grid.num; int Mz = z_grid.num;
0791   int Nx, Ny, Nz;
0792 
0793   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
0794   else                           Nx = Mx+2;
0795   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
0796   x_grid.delta_inv = 1.0/x_grid.delta;
0797   spline->x_grid   = x_grid;
0798 
0799   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     Ny = My+3;
0800   else                           Ny = My+2;
0801   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
0802   y_grid.delta_inv = 1.0/y_grid.delta;
0803   spline->y_grid   = y_grid;
0804 
0805   if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC)     Nz = Mz+3;
0806   else                           Nz = Mz+2;
0807   z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
0808   z_grid.delta_inv = 1.0/z_grid.delta;
0809   spline->z_grid   = z_grid;
0810 
0811   spline->x_stride = Ny*Nz;
0812   spline->y_stride = Nz;
0813 
0814 #ifndef HAVE_SSE2
0815   spline->coefs      =  new complex_float[Nx*Ny*Nz];
0816 #else
0817   posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*Nx*Ny*Nz);
0818 #endif
0819 
0820   BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
0821   xBC_r.lCode = xBC.lCode;  xBC_r.rCode = xBC.rCode;
0822   xBC_r.lVal  = xBC.lVal_r; xBC_r.rVal  = xBC.rVal_r;
0823   xBC_i.lCode = xBC.lCode;  xBC_i.rCode = xBC.rCode;
0824   xBC_i.lVal  = xBC.lVal_i; xBC_i.rVal  = xBC.rVal_i;
0825   yBC_r.lCode = yBC.lCode;  yBC_r.rCode = yBC.rCode;
0826   yBC_r.lVal  = yBC.lVal_r; yBC_r.rVal  = yBC.rVal_r;
0827   yBC_i.lCode = yBC.lCode;  yBC_i.rCode = yBC.rCode;
0828   yBC_i.lVal  = yBC.lVal_i; yBC_i.rVal  = yBC.rVal_i;
0829   zBC_r.lCode = zBC.lCode;  zBC_r.rCode = zBC.rCode;
0830   zBC_r.lVal  = zBC.lVal_r; zBC_r.rVal  = zBC.rVal_r;
0831   zBC_i.lCode = zBC.lCode;  zBC_i.rCode = zBC.rCode;
0832   zBC_i.lVal  = zBC.lVal_i; zBC_i.rVal  = zBC.rVal_i;
0833   // First, solve in the X-direction 
0834   for (int iy=0; iy<My; iy++) 
0835     for (int iz=0; iz<Mz; iz++) {
0836       intptr_t doffset = 2*(iy*Mz+iz);
0837       intptr_t coffset = 2*(iy*Nz+iz);
0838       // Real part
0839       find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My*Mz,
0840                ((float*)spline->coefs)+coffset, 2*Ny*Nz);
0841       // Imag part
0842       find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My*Mz,
0843                ((float*)spline->coefs)+coffset+1, 2*Ny*Nz);
0844     }
0845   
0846   // Now, solve in the Y-direction
0847   for (int ix=0; ix<Nx; ix++) 
0848     for (int iz=0; iz<Nz; iz++) {
0849       intptr_t doffset = 2*(ix*Ny*Nz + iz);
0850       intptr_t coffset = 2*(ix*Ny*Nz + iz);
0851       // Real part
0852       find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2*Nz, 
0853                ((float*)spline->coefs)+coffset, 2*Nz);
0854       // Imag part
0855       find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2*Nz, 
0856                ((float*)spline->coefs)+coffset+1, 2*Nz);
0857     }
0858 
0859   // Now, solve in the Z-direction
0860   for (int ix=0; ix<Nx; ix++) 
0861     for (int iy=0; iy<Ny; iy++) {
0862       intptr_t doffset = 2*((ix*Ny+iy)*Nz);
0863       intptr_t coffset = 2*((ix*Ny+iy)*Nz);
0864       // Real part
0865       find_coefs_1d_s (spline->z_grid, zBC_r, ((float*)spline->coefs)+doffset, 2, 
0866                ((float*)spline->coefs)+coffset, 2);
0867       // Imag part
0868       find_coefs_1d_s (spline->z_grid, zBC_i, ((float*)spline->coefs)+doffset+1, 2, 
0869                ((float*)spline->coefs)+coffset+1, 2);
0870     }
0871 
0872   init_sse_data();
0873   return spline;
0874 }
0875 
0876 void
0877 recompute_UBspline_3d_c (UBspline_3d_c* spline, complex_float *data)
0878 {
0879   // Setup internal variables
0880   int Mx = spline->x_grid.num;
0881   int My = spline->y_grid.num;
0882   int Mz = spline->z_grid.num;
0883   int Nx, Ny, Nz;
0884 
0885   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
0886   else                           Nx = Mx+2;
0887   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     Ny = My+3;
0888   else                           Ny = My+2;
0889   if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC)     Nz = Mz+3;
0890   else                           Nz = Mz+2;
0891 
0892   BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
0893   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
0894   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
0895   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
0896   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
0897   yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
0898   yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
0899   yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
0900   yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
0901   zBC_r.lCode = spline->zBC.lCode;  zBC_r.rCode = spline->zBC.rCode;
0902   zBC_r.lVal  = spline->zBC.lVal_r; zBC_r.rVal  = spline->zBC.rVal_r;
0903   zBC_i.lCode = spline->zBC.lCode;  zBC_i.rCode = spline->zBC.rCode;
0904   zBC_i.lVal  = spline->zBC.lVal_i; zBC_i.rVal  = spline->zBC.rVal_i;
0905   // First, solve in the X-direction 
0906   for (int iy=0; iy<My; iy++) 
0907     for (int iz=0; iz<Mz; iz++) {
0908       intptr_t doffset = 2*(iy*Mz+iz);
0909       intptr_t coffset = 2*(iy*Nz+iz);
0910       // Real part
0911       find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My*Mz,
0912                ((float*)spline->coefs)+coffset, 2*Ny*Nz);
0913       // Imag part
0914       find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My*Mz,
0915                ((float*)spline->coefs)+coffset+1, 2*Ny*Nz);
0916     }
0917   
0918   // Now, solve in the Y-direction
0919   for (int ix=0; ix<Nx; ix++) 
0920     for (int iz=0; iz<Nz; iz++) {
0921       intptr_t doffset = 2*(ix*Ny*Nz + iz);
0922       intptr_t coffset = 2*(ix*Ny*Nz + iz);
0923       // Real part
0924       find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2*Nz, 
0925                ((float*)spline->coefs)+coffset, 2*Nz);
0926       // Imag part
0927       find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2*Nz, 
0928                ((float*)spline->coefs)+coffset+1, 2*Nz);
0929     }
0930 
0931   // Now, solve in the Z-direction
0932   for (int ix=0; ix<Nx; ix++) 
0933     for (int iy=0; iy<Ny; iy++) {
0934       intptr_t doffset = 2*((ix*Ny+iy)*Nz);
0935       intptr_t coffset = 2*((ix*Ny+iy)*Nz);
0936       // Real part
0937       find_coefs_1d_s (spline->z_grid, zBC_r, ((float*)spline->coefs)+doffset, 2, 
0938                ((float*)spline->coefs)+coffset, 2);
0939       // Imag part
0940       find_coefs_1d_s (spline->z_grid, zBC_i, ((float*)spline->coefs)+doffset+1, 2, 
0941                ((float*)spline->coefs)+coffset+1, 2);
0942     }
0943 }
0944 
0945 
0946 ////////////////////////////////////////////////////////////
0947 ////////////////////////////////////////////////////////////
0948 ////     Double-Precision, Real Creation Routines       ////
0949 ////////////////////////////////////////////////////////////
0950 ////////////////////////////////////////////////////////////
0951 
0952 // On input, bands should be filled with:
0953 // row 0   :  abcdInitial from boundary conditions
0954 // rows 1:M:  basis functions in first 3 cols, data in last
0955 // row M+1 :  abcdFinal   from boundary conditions
0956 // cstride gives the stride between values in coefs.
0957 // On exit, coefs with contain interpolating B-spline coefs
0958 void 
0959 solve_deriv_interp_1d_d (double bands[], double coefs[],
0960              int M, int cstride)
0961 {
0962   // Solve interpolating equations
0963   // First and last rows are different
0964   bands[4*(0)+1] /= bands[4*(0)+0];
0965   bands[4*(0)+2] /= bands[4*(0)+0];
0966   bands[4*(0)+3] /= bands[4*(0)+0];
0967   bands[4*(0)+0] = 1.0;
0968   bands[4*(1)+1] -= bands[4*(1)+0]*bands[4*(0)+1];
0969   bands[4*(1)+2] -= bands[4*(1)+0]*bands[4*(0)+2];
0970   bands[4*(1)+3] -= bands[4*(1)+0]*bands[4*(0)+3];
0971   bands[4*(0)+0] = 0.0;
0972   bands[4*(1)+2] /= bands[4*(1)+1];
0973   bands[4*(1)+3] /= bands[4*(1)+1];
0974   bands[4*(1)+1] = 1.0;
0975   
0976   // Now do rows 2 through M+1
0977   for (int row=2; row < (M+1); row++) {
0978     bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2];
0979     bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3];
0980     bands[4*(row)+2] /= bands[4*(row)+1];
0981     bands[4*(row)+3] /= bands[4*(row)+1];
0982     bands[4*(row)+0] = 0.0;
0983     bands[4*(row)+1] = 1.0;
0984   }
0985 
0986   // Do last row
0987   bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2];
0988   bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3];
0989   bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2];
0990   bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3];
0991   bands[4*(M+1)+3] /= bands[4*(M+1)+2];
0992   bands[4*(M+1)+2] = 1.0;
0993 
0994   coefs[(M+1)*cstride] = bands[4*(M+1)+3];
0995   // Now back substitute up
0996   for (int row=M; row>0; row--)
0997     coefs[row*cstride] = bands[4*(row)+3] - bands[4*(row)+2]*coefs[cstride*(row+1)];
0998   
0999   // Finish with first row
1000   coefs[0] = bands[4*(0)+3] - bands[4*(0)+1]*coefs[1*cstride] - bands[4*(0)+2]*coefs[2*cstride];
1001 }
1002 
1003 // On input, bands should be filled with:
1004 // row 0   :  abcdInitial from boundary conditions
1005 // rows 1:M:  basis functions in first 3 cols, data in last
1006 // row M+1 :  abcdFinal   from boundary conditions
1007 // cstride gives the stride between values in coefs.
1008 // On exit, coefs with contain interpolating B-spline coefs
1009 void 
1010 solve_periodic_interp_1d_d (double bands[], double coefs[],
1011                 int M, int cstride)
1012 {
1013   std::vector<double> lastCol(M);
1014 
1015   // Now solve:
1016   // First and last rows are different
1017   bands[4*(0)+2] /= bands[4*(0)+1];
1018   bands[4*(0)+0] /= bands[4*(0)+1];
1019   bands[4*(0)+3] /= bands[4*(0)+1];
1020   bands[4*(0)+1]  = 1.0;
1021   bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
1022   bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
1023   bands[4*(M-1)+2]  = -bands[4*(M-1)+2]*bands[4*(0)+2];
1024   lastCol[0] = bands[4*(0)+0];
1025   
1026   for (int row=1; row < (M-1); row++) {
1027     bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
1028     bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
1029     lastCol[row]   = -bands[4*(row)+0] * lastCol[row-1];
1030     bands[4*(row)+0] = 0.0;
1031     bands[4*(row)+2] /= bands[4*(row)+1];
1032     bands[4*(row)+3] /= bands[4*(row)+1];
1033     lastCol[row]  /= bands[4*(row)+1];
1034     bands[4*(row)+1]  = 1.0;
1035     if (row < (M-2)) {
1036       bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
1037       bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
1038       bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
1039     }
1040   }
1041 
1042   // Now do last row
1043   // The [2] element and [0] element are now on top of each other 
1044   bands[4*(M-1)+0] += bands[4*(M-1)+2];
1045   bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
1046   bands[4*(M-1)+3] -= bands[4*(M-1)+0] *  bands[4*(M-2)+3];
1047   bands[4*(M-1)+3] /= bands[4*(M-1)+1];
1048   coefs[M*cstride] = bands[4*(M-1)+3];
1049   for (int row=M-2; row>=0; row--) 
1050     coefs[(row+1)*cstride] = 
1051       bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride];
1052   
1053   coefs[0*cstride] = coefs[M*cstride];
1054   coefs[(M+1)*cstride] = coefs[1*cstride];
1055   coefs[(M+2)*cstride] = coefs[2*cstride];
1056 }
1057 
1058 // On input, bands should be filled with:
1059 // row 0   :  abcdInitial from boundary conditions
1060 // rows 1:M:  basis functions in first 3 cols, data in last
1061 // row M+1 :  abcdFinal   from boundary conditions
1062 // cstride gives the stride between values in coefs.
1063 // On exit, coefs with contain interpolating B-spline coefs
1064 void 
1065 solve_antiperiodic_interp_1d_d (double bands[], double coefs[],
1066                 int M, int cstride)
1067 {
1068   std::vector<double> lastCol(M);
1069 
1070   bands[4*0+0]     *= -1.0;
1071   bands[4*(M-1)+2] *= -1.0;
1072   // Now solve:
1073   // First and last rows are different
1074   bands[4*(0)+2] /= bands[4*(0)+1];
1075   bands[4*(0)+0] /= bands[4*(0)+1];
1076   bands[4*(0)+3] /= bands[4*(0)+1];
1077   bands[4*(0)+1]  = 1.0;
1078   bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
1079   bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
1080   bands[4*(M-1)+2]  = -bands[4*(M-1)+2]*bands[4*(0)+2];
1081   lastCol[0] = bands[4*(0)+0];
1082   
1083   for (int row=1; row < (M-1); row++) {
1084     bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
1085     bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
1086     lastCol[row]   = -bands[4*(row)+0] * lastCol[row-1];
1087     bands[4*(row)+0] = 0.0;
1088     bands[4*(row)+2] /= bands[4*(row)+1];
1089     bands[4*(row)+3] /= bands[4*(row)+1];
1090     lastCol[row]  /= bands[4*(row)+1];
1091     bands[4*(row)+1]  = 1.0;
1092     if (row < (M-2)) {
1093       bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
1094       bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
1095       bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
1096     }
1097   }
1098 
1099   // Now do last row
1100   // The [2] element and [0] element are now on top of each other 
1101   bands[4*(M-1)+0] += bands[4*(M-1)+2];
1102   bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
1103   bands[4*(M-1)+3] -= bands[4*(M-1)+0] *  bands[4*(M-2)+3];
1104   bands[4*(M-1)+3] /= bands[4*(M-1)+1];
1105   coefs[M*cstride] = bands[4*(M-1)+3];
1106   for (int row=M-2; row>=0; row--) 
1107     coefs[(row+1)*cstride] = 
1108       bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride];
1109   
1110   coefs[0*cstride]     = -coefs[M*cstride];
1111   coefs[(M+1)*cstride] = -coefs[1*cstride];
1112   coefs[(M+2)*cstride] = -coefs[2*cstride];
1113 }
1114 
1115 
1116 
1117 void
1118 find_coefs_1d_d (Ugrid grid, BCtype_d bc, 
1119          double *data,  intptr_t dstride,
1120          double *coefs, intptr_t cstride)
1121 {
1122   int M = grid.num;
1123   double basis[4] = {1.0/6.0, 2.0/3.0, 1.0/6.0, 0.0};
1124   if (bc.lCode == PERIODIC || bc.lCode == ANTIPERIODIC) {
1125 #ifdef HAVE_C_VARARRAYS
1126     double bands[M*4];
1127 #else
1128     double *bands = new double[4*M];
1129 #endif
1130     for (int i=0; i<M; i++) {
1131       bands[4*i+0] = basis[0];
1132       bands[4*i+1] = basis[1];
1133       bands[4*i+2] = basis[2];
1134       bands[4*i+3] = data[i*dstride];
1135     }
1136     if (bc.lCode == ANTIPERIODIC) 
1137       solve_antiperiodic_interp_1d_d (bands, coefs, M, cstride);
1138     else
1139       solve_periodic_interp_1d_d (bands, coefs, M, cstride);
1140 
1141 
1142 #ifndef HAVE_C_VARARRAYS
1143     delete[] bands;
1144 #endif
1145   }
1146   else {
1147     // Setup boundary conditions
1148     double abcd_left[4];
1149     double abcd_right[4];
1150     for (int i = 0; i < 4; i++) {
1151         abcd_left[i] = 0.0;
1152         abcd_right[i] = 0.0;
1153     }
1154     // Left boundary
1155     if (bc.lCode == FLAT || bc.lCode == NATURAL)
1156       bc.lVal = 0.0;
1157     if (bc.lCode == FLAT || bc.lCode == DERIV1) {
1158       abcd_left[0] = -0.5 * grid.delta_inv;
1159       abcd_left[1] =  0.0 * grid.delta_inv; 
1160       abcd_left[2] =  0.5 * grid.delta_inv;
1161       abcd_left[3] =  bc.lVal;
1162     }
1163     if (bc.lCode == NATURAL || bc.lCode == DERIV2) {
1164       abcd_left[0] = 1.0 * grid.delta_inv * grid.delta_inv;
1165       abcd_left[1] =-2.0 * grid.delta_inv * grid.delta_inv;
1166       abcd_left[2] = 1.0 * grid.delta_inv * grid.delta_inv;
1167       abcd_left[3] = bc.lVal;
1168     }
1169     
1170     // Right boundary
1171     if (bc.rCode == FLAT || bc.rCode == NATURAL)
1172       bc.rVal = 0.0;
1173     if (bc.rCode == FLAT || bc.rCode == DERIV1) {
1174       abcd_right[0] = -0.5 * grid.delta_inv;
1175       abcd_right[1] =  0.0 * grid.delta_inv; 
1176       abcd_right[2] =  0.5 * grid.delta_inv;
1177       abcd_right[3] =  bc.rVal;
1178     }
1179     if (bc.rCode == NATURAL || bc.rCode == DERIV2) {
1180       abcd_right[0] = 1.0 *grid.delta_inv * grid.delta_inv;
1181       abcd_right[1] =-2.0 *grid.delta_inv * grid.delta_inv;
1182       abcd_right[2] = 1.0 *grid.delta_inv * grid.delta_inv;
1183       abcd_right[3] = bc.rVal;
1184     }
1185 #ifdef HAVE_C_VARARRAYS
1186     double bands[(M+2)*4];
1187 #else
1188     double *bands = new double[(M+2)*4];
1189 #endif
1190     for (int i=0; i<4; i++) {
1191       bands[4*( 0 )+i] = abcd_left[i];
1192       bands[4*(M+1)+i] = abcd_right[i];
1193     }
1194     for (int i=0; i<M; i++) {
1195       for (int j=0; j<3; j++)
1196     bands[4*(i+1)+j] = basis[j];
1197       bands[4*(i+1)+3] = data[i*dstride];
1198     }   
1199     // Now, solve for coefficients
1200     solve_deriv_interp_1d_d (bands, coefs, M, cstride);
1201 #ifndef HAVE_C_VARARRAYS
1202     delete[] bands;
1203 #endif
1204   }
1205 }
1206 
1207            
1208 
1209 UBspline_1d_d*
1210 create_UBspline_1d_d (Ugrid x_grid, BCtype_d xBC, double *data)
1211 {
1212   // Create new spline
1213   UBspline_1d_d* restrict spline = static_cast<UBspline_1d_d*>(malloc(sizeof(UBspline_1d_d)));
1214   spline->spcode = U1D;
1215   spline->tcode  = DOUBLE_REAL;
1216   spline->xBC = xBC; 
1217 
1218   // Setup internal variables
1219   int M = x_grid.num;
1220   int N;
1221 
1222   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
1223     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num);
1224     N = M+3;
1225   }
1226   else {
1227     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
1228     N = M+2;
1229   }
1230 
1231   x_grid.delta_inv = 1.0/x_grid.delta;
1232   spline->x_grid   = x_grid;
1233 
1234 #ifndef HAVE_SSE2
1235   spline->coefs = (double*)malloc (sizeof(double)*N);
1236 #else
1237   posix_memalign ((void**)&spline->coefs, 16, sizeof(double)*N);
1238 #endif
1239   find_coefs_1d_d (spline->x_grid, xBC, data, 1, spline->coefs, 1);
1240     
1241   init_sse_data();
1242   return spline;
1243 }
1244 
1245 void
1246 recompute_UBspline_1d_d (UBspline_1d_d* spline, double *data)
1247 {
1248   find_coefs_1d_d (spline->x_grid, spline->xBC, data, 1, spline->coefs, 1);
1249 }
1250 
1251 
1252 UBspline_2d_d*
1253 create_UBspline_2d_d (Ugrid x_grid, Ugrid y_grid,
1254               BCtype_d xBC, BCtype_d yBC, double *data)
1255 {
1256   // Create new spline
1257   UBspline_2d_d* restrict spline = static_cast<UBspline_2d_d*>(malloc(sizeof(UBspline_2d_d)));
1258   spline->spcode = U2D;
1259   spline->tcode  = DOUBLE_REAL;
1260   spline->xBC = xBC; 
1261   spline->yBC = yBC; 
1262  
1263   // Setup internal variables
1264   int Mx = x_grid.num;
1265   int My = y_grid.num;
1266   int Nx, Ny;
1267 
1268   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
1269   else                           Nx = Mx+2;
1270   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
1271   x_grid.delta_inv = 1.0/x_grid.delta;
1272   spline->x_grid   = x_grid;
1273 
1274   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     Ny = My+3;
1275   else                           Ny = My+2;
1276   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
1277   y_grid.delta_inv = 1.0/y_grid.delta;
1278   spline->y_grid   = y_grid;
1279   spline->x_stride = Ny;
1280 
1281 #ifndef HAVE_SSE2
1282   spline->coefs = (double*)malloc (sizeof(double)*Nx*Ny);
1283 #else
1284   posix_memalign ((void**)&spline->coefs, 16, (sizeof(double)*Nx*Ny));
1285 #endif
1286 
1287   // First, solve in the X-direction 
1288   for (int iy=0; iy<My; iy++) {
1289     intptr_t doffset = iy;
1290     intptr_t coffset = iy;
1291     find_coefs_1d_d (spline->x_grid, xBC, data+doffset, My,
1292              spline->coefs+coffset, Ny);
1293   }
1294   
1295   // Now, solve in the Y-direction
1296   for (int ix=0; ix<Nx; ix++) {
1297     intptr_t doffset = ix*Ny;
1298     intptr_t coffset = ix*Ny;
1299     find_coefs_1d_d (spline->y_grid, yBC, spline->coefs+doffset, 1, 
1300              spline->coefs+coffset, 1);
1301   }
1302 
1303   init_sse_data();
1304   return spline;
1305 }
1306 
1307 void
1308 recompute_UBspline_2d_d (UBspline_2d_d* spline, double *data)
1309 {
1310   int Mx = spline->x_grid.num;
1311   int My = spline->y_grid.num;
1312   int Nx, Ny;
1313 
1314   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
1315     Nx = Mx+3;
1316   else                                   
1317     Nx = Mx+2;
1318 
1319   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)
1320     Ny = My+3;
1321   else                                   
1322     Ny = My+2;
1323 
1324   // First, solve in the X-direction 
1325   for (int iy=0; iy<My; iy++) {
1326     intptr_t doffset = iy;
1327     intptr_t coffset = iy;
1328     find_coefs_1d_d (spline->x_grid, spline->xBC, data+doffset, My,
1329              spline->coefs+coffset, Ny);
1330   }
1331   
1332   // Now, solve in the Y-direction
1333   for (int ix=0; ix<Nx; ix++) {
1334     intptr_t doffset = ix*Ny;
1335     intptr_t coffset = ix*Ny;
1336     find_coefs_1d_d (spline->y_grid, spline->yBC, spline->coefs+doffset, 1, 
1337              spline->coefs+coffset, 1);
1338   }
1339 }
1340 
1341 
1342 UBspline_3d_d*
1343 create_UBspline_3d_d (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
1344               BCtype_d xBC, BCtype_d yBC, BCtype_d zBC,
1345               double *data)
1346 {
1347   // Create new spline
1348   UBspline_3d_d* restrict spline = static_cast<UBspline_3d_d*>(malloc(sizeof(UBspline_3d_d)));
1349   spline->spcode = U3D;
1350   spline->tcode  = DOUBLE_REAL;
1351   spline->xBC = xBC; 
1352   spline->yBC = yBC; 
1353   spline->zBC = zBC; 
1354 
1355   // Setup internal variables
1356   int Mx = x_grid.num;  int My = y_grid.num; int Mz = z_grid.num;
1357   int Nx, Ny, Nz;
1358 
1359   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
1360   else                           Nx = Mx+2;
1361   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
1362   x_grid.delta_inv = 1.0/x_grid.delta;
1363   spline->x_grid   = x_grid;
1364 
1365   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     Ny = My+3;
1366   else                           Ny = My+2;
1367   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
1368   y_grid.delta_inv = 1.0/y_grid.delta;
1369   spline->y_grid   = y_grid;
1370 
1371   if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC)     Nz = Mz+3;
1372   else                           Nz = Mz+2;
1373   z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
1374   z_grid.delta_inv = 1.0/z_grid.delta;
1375   spline->z_grid   = z_grid;
1376 
1377   spline->x_stride = Ny*Nz;
1378   spline->y_stride = Nz;
1379 
1380 #ifndef HAVE_SSE2
1381   spline->coefs      = new double[Nx*Ny*Nz];
1382 #else
1383   posix_memalign ((void**)&spline->coefs, 16, (sizeof(double)*Nx*Ny*Nz));
1384 #endif
1385 
1386   // First, solve in the X-direction 
1387   for (int iy=0; iy<My; iy++) 
1388     for (int iz=0; iz<Mz; iz++) {
1389       intptr_t doffset = iy*Mz+iz;
1390       intptr_t coffset = iy*Nz+iz;
1391       find_coefs_1d_d (spline->x_grid, xBC, data+doffset, My*Mz,
1392                spline->coefs+coffset, Ny*Nz);
1393     }
1394   
1395   // Now, solve in the Y-direction
1396   for (int ix=0; ix<Nx; ix++) 
1397     for (int iz=0; iz<Nz; iz++) {
1398       intptr_t doffset = ix*Ny*Nz + iz;
1399       intptr_t coffset = ix*Ny*Nz + iz;
1400       find_coefs_1d_d (spline->y_grid, yBC, spline->coefs+doffset, Nz, 
1401                spline->coefs+coffset, Nz);
1402     }
1403 
1404   // Now, solve in the Z-direction
1405   for (int ix=0; ix<Nx; ix++) 
1406     for (int iy=0; iy<Ny; iy++) {
1407       intptr_t doffset = (ix*Ny+iy)*Nz;
1408       intptr_t coffset = (ix*Ny+iy)*Nz;
1409       find_coefs_1d_d (spline->z_grid, zBC, spline->coefs+doffset, 1, 
1410                spline->coefs+coffset, 1);
1411     }
1412 
1413   init_sse_data();
1414   return spline;
1415 }
1416 
1417 void
1418 recompute_UBspline_3d_d (UBspline_3d_d* spline, double *data)
1419 {
1420   int Mx = spline->x_grid.num;  
1421   int My = spline->y_grid.num; 
1422   int Mz = spline->z_grid.num;
1423   int Nx, Ny, Nz;
1424 
1425   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     
1426     Nx = Mx+3;
1427   else                                   
1428     Nx = Mx+2;
1429   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     
1430     Ny = My+3;
1431   else                                   
1432     Ny = My+2;
1433   if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC)     
1434     Nz = Mz+3;
1435   else                                   
1436     Nz = Mz+2;
1437 
1438   // First, solve in the X-direction 
1439   for (int iy=0; iy<My; iy++) 
1440     for (int iz=0; iz<Mz; iz++) {
1441       intptr_t doffset = iy*Mz+iz;
1442       intptr_t coffset = iy*Nz+iz;
1443       find_coefs_1d_d (spline->x_grid, spline->xBC, data+doffset, My*Mz,
1444                spline->coefs+coffset, Ny*Nz);
1445     }
1446   
1447   // Now, solve in the Y-direction
1448   for (int ix=0; ix<Nx; ix++) 
1449     for (int iz=0; iz<Nz; iz++) {
1450       intptr_t doffset = ix*Ny*Nz + iz;
1451       intptr_t coffset = ix*Ny*Nz + iz;
1452       find_coefs_1d_d (spline->y_grid, spline->yBC, spline->coefs+doffset, Nz, 
1453                spline->coefs+coffset, Nz);
1454     }
1455 
1456   // Now, solve in the Z-direction
1457   for (int ix=0; ix<Nx; ix++) 
1458     for (int iy=0; iy<Ny; iy++) {
1459       intptr_t doffset = (ix*Ny+iy)*Nz;
1460       intptr_t coffset = (ix*Ny+iy)*Nz;
1461       find_coefs_1d_d (spline->z_grid, spline->zBC, spline->coefs+doffset, 1, 
1462                spline->coefs+coffset, 1);
1463     }
1464 }
1465 
1466 
1467 ////////////////////////////////////////////////////////////
1468 ////////////////////////////////////////////////////////////
1469 ////    Double-Precision, Complex Creation Routines     ////
1470 ////////////////////////////////////////////////////////////
1471 ////////////////////////////////////////////////////////////
1472 
1473 // On input, bands should be filled with:
1474 // row 0   :  abcdInitial from boundary conditions
1475 // rows 1:M:  basis functions in first 3 cols, data in last
1476 // row M+1 :  abcdFinal   from boundary conditions
1477 // cstride gives the stride between values in coefs.
1478 // On exit, coefs with contain interpolating B-spline coefs
1479 
1480 
1481 UBspline_1d_z*
1482 create_UBspline_1d_z (Ugrid x_grid, BCtype_z xBC, complex_double *data)
1483 {
1484   // Create new spline
1485   UBspline_1d_z* restrict spline = static_cast<UBspline_1d_z*>(malloc(sizeof(UBspline_1d_z)));
1486   spline->spcode = U1D;
1487   spline->tcode  = DOUBLE_COMPLEX;
1488   spline->xBC = xBC; 
1489 
1490   // Setup internal variables
1491   int M = x_grid.num;
1492   int N;
1493 
1494   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
1495     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num);
1496     N = M+3;
1497   }
1498   else {
1499     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
1500     N = M+2;
1501   }
1502 
1503   x_grid.delta_inv = 1.0/x_grid.delta;
1504   spline->x_grid   = x_grid;
1505 #ifndef HAVE_SSE2
1506   spline->coefs = (complex_double*)malloc (2*sizeof(double)*N);
1507 #else
1508   posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*N);
1509 #endif
1510 
1511   BCtype_d xBC_r, xBC_i;
1512   xBC_r.lCode = xBC.lCode;  xBC_r.rCode = xBC.rCode;
1513   xBC_r.lVal  = xBC.lVal_r; xBC_r.rVal  = xBC.rVal_r;
1514   xBC_i.lCode = xBC.lCode;  xBC_i.rCode = xBC.rCode;
1515   xBC_i.lVal  = xBC.lVal_i; xBC_i.rVal  = xBC.rVal_i;
1516   // Real part
1517   find_coefs_1d_d (spline->x_grid, xBC_r, (double*)data, 2, 
1518            (double*)spline->coefs, 2);
1519   // Imaginarty part
1520   find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+1, 2, 
1521            ((double*)spline->coefs)+1, 2);
1522  
1523   init_sse_data();   
1524   return spline;
1525 }
1526 
1527 void
1528 recompute_UBspline_1d_z (UBspline_1d_z* spline, complex_double *data)
1529 {
1530 //  int M = spline->x_grid.num;
1531 //  int N;
1532 
1533 //  if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
1534 //    N = M+3;
1535 //  else
1536 //    N = M+2;
1537 
1538   BCtype_d xBC_r, xBC_i;
1539   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
1540   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
1541   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
1542   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
1543   // Real part
1544   find_coefs_1d_d (spline->x_grid, xBC_r, (double*)data, 2, 
1545            (double*)spline->coefs, 2);
1546   // Imaginarty part
1547   find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+1, 2, 
1548            ((double*)spline->coefs)+1, 2);
1549  
1550 }
1551 
1552 
1553 UBspline_2d_z*
1554 create_UBspline_2d_z (Ugrid x_grid, Ugrid y_grid,
1555               BCtype_z xBC, BCtype_z yBC, complex_double *data)
1556 {
1557   // Create new spline
1558   UBspline_2d_z* restrict spline = static_cast<UBspline_2d_z*>(malloc(sizeof(UBspline_2d_z)));
1559   spline->spcode = U2D;
1560   spline->tcode  = DOUBLE_COMPLEX;
1561   spline->xBC = xBC; 
1562   spline->yBC = yBC; 
1563 
1564   // Setup internal variables
1565   int Mx = x_grid.num;
1566   int My = y_grid.num;
1567   int Nx, Ny;
1568 
1569   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     
1570     Nx = Mx+3;
1571   else                           
1572     Nx = Mx+2;
1573   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
1574   x_grid.delta_inv = 1.0/x_grid.delta;
1575   spline->x_grid   = x_grid;
1576 
1577   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     
1578     Ny = My+3;
1579   else                           
1580     Ny = My+2;
1581   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
1582   y_grid.delta_inv = 1.0/y_grid.delta;
1583   spline->y_grid   = y_grid;
1584   spline->x_stride = Ny;
1585 
1586 #ifndef HAVE_SSE2
1587   spline->coefs = (complex_double*)malloc (2*sizeof(double)*Nx*Ny);
1588 #else
1589   posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*Nx*Ny);
1590 #endif
1591 
1592   BCtype_d xBC_r, xBC_i, yBC_r, yBC_i;
1593   xBC_r.lCode = xBC.lCode;  xBC_r.rCode = xBC.rCode;
1594   xBC_r.lVal  = xBC.lVal_r; xBC_r.rVal  = xBC.rVal_r;
1595   xBC_i.lCode = xBC.lCode;  xBC_i.rCode = xBC.rCode;
1596   xBC_i.lVal  = xBC.lVal_i; xBC_i.rVal  = xBC.rVal_i;
1597   yBC_r.lCode = yBC.lCode;  yBC_r.rCode = yBC.rCode;
1598   yBC_r.lVal  = yBC.lVal_r; yBC_r.rVal  = yBC.rVal_r;
1599   yBC_i.lCode = yBC.lCode;  yBC_i.rCode = yBC.rCode;
1600   yBC_i.lVal  = yBC.lVal_i; yBC_i.rVal  = yBC.rVal_i;
1601   // First, solve in the X-direction 
1602   for (int iy=0; iy<My; iy++) {
1603     intptr_t doffset = 2*iy;
1604     intptr_t coffset = 2*iy;
1605     // Real part
1606     find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data+doffset), 2*My,
1607              (double*)spline->coefs+coffset, 2*Ny);
1608     // Imag part
1609     find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My,
1610              ((double*)spline->coefs)+coffset+1, 2*Ny);
1611   }
1612   
1613   // Now, solve in the Y-direction
1614   for (int ix=0; ix<Nx; ix++) {
1615     intptr_t doffset = 2*ix*Ny;
1616     intptr_t coffset = 2*ix*Ny;
1617     // Real part
1618     find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2, 
1619              (double*)spline->coefs+coffset, 2);
1620     // Imag part
1621     find_coefs_1d_d (spline->y_grid, yBC_i, (double*)spline->coefs+doffset+1, 2, 
1622              ((double*)spline->coefs)+coffset+1, 2);
1623   }
1624 
1625   init_sse_data();
1626   return spline;
1627 }
1628 
1629 
1630 void
1631 recompute_UBspline_2d_z (UBspline_2d_z* spline, complex_double *data)
1632 {
1633   int Mx = spline->x_grid.num;
1634   int My = spline->y_grid.num;
1635   int Nx, Ny;
1636 
1637   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     
1638     Nx = Mx+3;
1639   else                           
1640     Nx = Mx+2;
1641   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     
1642     Ny = My+3;
1643   else                           
1644     Ny = My+2;
1645 
1646   BCtype_d xBC_r, xBC_i, yBC_r, yBC_i;
1647   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
1648   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
1649   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
1650   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
1651   yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
1652   yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
1653   yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
1654   yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
1655 
1656   // First, solve in the X-direction 
1657   for (int iy=0; iy<My; iy++) {
1658     intptr_t doffset = 2*iy;
1659     intptr_t coffset = 2*iy;
1660     // Real part
1661     find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data+doffset), 2*My,
1662              (double*)spline->coefs+coffset, 2*Ny);
1663     // Imag part
1664     find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My,
1665              ((double*)spline->coefs)+coffset+1, 2*Ny);
1666   }
1667   
1668   // Now, solve in the Y-direction
1669   for (int ix=0; ix<Nx; ix++) {
1670     intptr_t doffset = 2*ix*Ny;
1671     intptr_t coffset = 2*ix*Ny;
1672     // Real part
1673     find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2, 
1674              (double*)spline->coefs+coffset, 2);
1675     // Imag part
1676     find_coefs_1d_d (spline->y_grid, yBC_i, (double*)spline->coefs+doffset+1, 2, 
1677              ((double*)spline->coefs)+coffset+1, 2);
1678   }
1679 }
1680 
1681 
1682 
1683 UBspline_3d_z*
1684 create_UBspline_3d_z (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
1685               BCtype_z xBC, BCtype_z yBC, BCtype_z zBC,
1686               complex_double *data)
1687 {
1688   // Create new spline
1689   UBspline_3d_z* restrict spline = static_cast<UBspline_3d_z*>(malloc(sizeof(UBspline_3d_z)));
1690   spline->spcode = U3D;
1691   spline->tcode  = DOUBLE_COMPLEX;
1692   spline->xBC = xBC; 
1693   spline->yBC = yBC; 
1694   spline->zBC = zBC;
1695 
1696   // Setup internal variables
1697   int Mx = x_grid.num;  int My = y_grid.num; int Mz = z_grid.num;
1698   int Nx, Ny, Nz;
1699 
1700   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
1701   else                           Nx = Mx+2;
1702   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
1703   x_grid.delta_inv = 1.0/x_grid.delta;
1704   spline->x_grid   = x_grid;
1705 
1706   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     Ny = My+3;
1707   else                           Ny = My+2;
1708   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
1709   y_grid.delta_inv = 1.0/y_grid.delta;
1710   spline->y_grid   = y_grid;
1711 
1712   if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC)     Nz = Mz+3;
1713   else                           Nz = Mz+2;
1714   z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
1715   z_grid.delta_inv = 1.0/z_grid.delta;
1716   spline->z_grid   = z_grid;
1717 
1718   spline->x_stride = Ny*Nz;
1719   spline->y_stride = Nz;
1720 
1721 #ifndef HAVE_SSE2
1722   spline->coefs      = new complex_double[Nx*Ny*Nz];
1723 #else
1724   posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*Nx*Ny*Nz);
1725 #endif
1726 
1727   BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
1728   xBC_r.lCode = xBC.lCode;  xBC_r.rCode = xBC.rCode;
1729   xBC_r.lVal  = xBC.lVal_r; xBC_r.rVal  = xBC.rVal_r;
1730   xBC_i.lCode = xBC.lCode;  xBC_i.rCode = xBC.rCode;
1731   xBC_i.lVal  = xBC.lVal_i; xBC_i.rVal  = xBC.rVal_i;
1732   yBC_r.lCode = yBC.lCode;  yBC_r.rCode = yBC.rCode;
1733   yBC_r.lVal  = yBC.lVal_r; yBC_r.rVal  = yBC.rVal_r;
1734   yBC_i.lCode = yBC.lCode;  yBC_i.rCode = yBC.rCode;
1735   yBC_i.lVal  = yBC.lVal_i; yBC_i.rVal  = yBC.rVal_i;
1736   zBC_r.lCode = zBC.lCode;  zBC_r.rCode = zBC.rCode;
1737   zBC_r.lVal  = zBC.lVal_r; zBC_r.rVal  = zBC.rVal_r;
1738   zBC_i.lCode = zBC.lCode;  zBC_i.rCode = zBC.rCode;
1739   zBC_i.lVal  = zBC.lVal_i; zBC_i.rVal  = zBC.rVal_i;
1740   // First, solve in the X-direction 
1741   for (int iy=0; iy<My; iy++) 
1742     for (int iz=0; iz<Mz; iz++) {
1743       intptr_t doffset = 2*(iy*Mz+iz);
1744       intptr_t coffset = 2*(iy*Nz+iz);
1745       // Real part
1746       find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data)+doffset, 2*My*Mz,
1747                ((double*)spline->coefs)+coffset, 2*Ny*Nz);
1748       // Imag part
1749       find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My*Mz,
1750                ((double*)spline->coefs)+coffset+1, 2*Ny*Nz);
1751     }
1752   
1753   // Now, solve in the Y-direction
1754   for (int ix=0; ix<Nx; ix++) 
1755     for (int iz=0; iz<Nz; iz++) {
1756       intptr_t doffset = 2*(ix*Ny*Nz + iz);
1757       intptr_t coffset = 2*(ix*Ny*Nz + iz);
1758       // Real part
1759       find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2*Nz, 
1760                ((double*)spline->coefs)+coffset, 2*Nz);
1761       // Imag part
1762       find_coefs_1d_d (spline->y_grid, yBC_i, ((double*)spline->coefs)+doffset+1, 2*Nz, 
1763                ((double*)spline->coefs)+coffset+1, 2*Nz);
1764     }
1765 
1766   // Now, solve in the Z-direction
1767   for (int ix=0; ix<Nx; ix++) 
1768     for (int iy=0; iy<Ny; iy++) {
1769       intptr_t doffset = 2*((ix*Ny+iy)*Nz);
1770       intptr_t coffset = 2*((ix*Ny+iy)*Nz);
1771       // Real part
1772       find_coefs_1d_d (spline->z_grid, zBC_r, ((double*)spline->coefs)+doffset, 2, 
1773                ((double*)spline->coefs)+coffset, 2);
1774       // Imag part
1775       find_coefs_1d_d (spline->z_grid, zBC_i, ((double*)spline->coefs)+doffset+1, 2, 
1776                ((double*)spline->coefs)+coffset+1, 2);
1777     }
1778   init_sse_data();
1779   return spline;
1780 }
1781 
1782 void
1783 recompute_UBspline_3d_z (UBspline_3d_z* spline, complex_double *data)
1784 {
1785   // Setup internal variables
1786   int Mx = spline->x_grid.num;  
1787   int My = spline->y_grid.num; 
1788   int Mz = spline->z_grid.num;
1789   int Nx, Ny, Nz;
1790 
1791   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     
1792     Nx = Mx+3;
1793   else                                                                
1794     Nx = Mx+2;
1795   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     
1796     Ny = My+3;
1797   else                                                                
1798     Ny = My+2;
1799   if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC)     
1800     Nz = Mz+3;
1801   else                                                                
1802     Nz = Mz+2;
1803 
1804   BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
1805   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
1806   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
1807   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
1808   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
1809   yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
1810   yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
1811   yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
1812   yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
1813   zBC_r.lCode = spline->zBC.lCode;  zBC_r.rCode = spline->zBC.rCode;
1814   zBC_r.lVal  = spline->zBC.lVal_r; zBC_r.rVal  = spline->zBC.rVal_r;
1815   zBC_i.lCode = spline->zBC.lCode;  zBC_i.rCode = spline->zBC.rCode;
1816   zBC_i.lVal  = spline->zBC.lVal_i; zBC_i.rVal  = spline->zBC.rVal_i;
1817   // First, solve in the X-direction 
1818   for (int iy=0; iy<My; iy++) 
1819     for (int iz=0; iz<Mz; iz++) {
1820       intptr_t doffset = 2*(iy*Mz+iz);
1821       intptr_t coffset = 2*(iy*Nz+iz);
1822       // Real part
1823       find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data)+doffset, 2*My*Mz,
1824                ((double*)spline->coefs)+coffset, 2*Ny*Nz);
1825       // Imag part
1826       find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My*Mz,
1827                ((double*)spline->coefs)+coffset+1, 2*Ny*Nz);
1828     }
1829   
1830   // Now, solve in the Y-direction
1831   for (int ix=0; ix<Nx; ix++) 
1832     for (int iz=0; iz<Nz; iz++) {
1833       intptr_t doffset = 2*(ix*Ny*Nz + iz);
1834       intptr_t coffset = 2*(ix*Ny*Nz + iz);
1835       // Real part
1836       find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2*Nz, 
1837                ((double*)spline->coefs)+coffset, 2*Nz);
1838       // Imag part
1839       find_coefs_1d_d (spline->y_grid, yBC_i, ((double*)spline->coefs)+doffset+1, 2*Nz, 
1840                ((double*)spline->coefs)+coffset+1, 2*Nz);
1841     }
1842 
1843   // Now, solve in the Z-direction
1844   for (int ix=0; ix<Nx; ix++) 
1845     for (int iy=0; iy<Ny; iy++) {
1846       intptr_t doffset = 2*((ix*Ny+iy)*Nz);
1847       intptr_t coffset = 2*((ix*Ny+iy)*Nz);
1848       // Real part
1849       find_coefs_1d_d (spline->z_grid, zBC_r, ((double*)spline->coefs)+doffset, 2, 
1850                ((double*)spline->coefs)+coffset, 2);
1851       // Imag part
1852       find_coefs_1d_d (spline->z_grid, zBC_i, ((double*)spline->coefs)+doffset+1, 2, 
1853                ((double*)spline->coefs)+coffset+1, 2);
1854     }
1855 }
1856 
1857 
1858 void
1859 destroy_UBspline (Bspline *spline)
1860 {
1861   free(spline->coefs);
1862   free(spline);
1863 }
1864 
1865 void 
1866 destroy_NUBspline (Bspline *spline);
1867 
1868 void
1869 destroy_multi_UBspline (Bspline *spline);
1870 
1871 void
1872 destroy_Bspline (void *spline)
1873 {
1874   Bspline *sp = (Bspline *)spline;
1875   if (sp->sp_code <= U3D) 
1876     destroy_UBspline (sp);
1877   else if (sp->sp_code <= NU3D) 
1878     destroy_NUBspline (sp);
1879   else if (sp->sp_code <= MULTI_U3D)
1880     destroy_multi_UBspline (sp);
1881   else
1882     fprintf (stderr, "Error in destroy_Bspline:  invalid spline code %d.\n",
1883          sp->sp_code);
1884 }