File indexing completed on 2024-12-22 04:09:53

0001 /////////////////////////////////////////////////////////////////////////////
0002 //  einspline:  a library for creating and evaluating B-splines            //
0003 //  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
0004 //                                                                         //
0005 //  This program is free software; you can redistribute it and/or modify   //
0006 //  it under the terms of the GNU General Public License as published by   //
0007 //  the Free Software Foundation; either version 2 of the License, or      //
0008 //  (at your option) any later version.                                    //
0009 //                                                                         //
0010 //  This program is distributed in the hope that it will be useful,        //
0011 //  but WITHOUT ANY WARRANTY; without even the implied warranty of         //
0012 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
0013 //  GNU General Public License for more details.                           //
0014 //                                                                         //
0015 //  You should have received a copy of the GNU General Public License      //
0016 //  along with this program; if not, write to the Free Software            //
0017 //  Foundation, Inc., 51 Franklin Street, Fifth Floor,                     //
0018 //  Boston, MA  02110-1301  USA                                            //
0019 /////////////////////////////////////////////////////////////////////////////
0020 
0021 #include "nubspline_create.h"
0022 #include <math.h>
0023 #include <assert.h>
0024 #ifndef _XOPEN_SOURCE
0025   #define _XOPEN_SOURCE 600
0026 #endif
0027 #ifndef __USE_XOPEN2K
0028   #define __USE_XOPEN2K
0029 #endif
0030 #include <stdlib.h>
0031 #include <stdio.h>
0032 
0033 ////////////////////////////////////////////////////////
0034 // Notes on conventions:                              //
0035 // Below, M (and Mx, My, Mz) represent the number of  //
0036 // data points to be interpolated.  With derivative   //
0037 // boundary conditions, it is equal to the number of  //
0038 // grid points.  With periodic boundary conditions,   //
0039 // it is one less than the number of grid points.     //
0040 // N (and Nx, Ny, Nz) is the number of B-spline       //
0041 // coefficients, which is #(grid points)+2 for all    //
0042 // boundary conditions.                               //
0043 ////////////////////////////////////////////////////////
0044 
0045 
0046 ////////////////////////////////////////////////////////
0047 ////////////////////////////////////////////////////////
0048 //// Single-precision real creation routines        ////
0049 ////////////////////////////////////////////////////////
0050 ////////////////////////////////////////////////////////
0051 void
0052 solve_NUB_deriv_interp_1d_s (NUBasis* restrict basis, 
0053                  float* restrict data, int datastride,
0054                  float* restrict    p, int pstride,
0055                  float abcdInitial[4], float abcdFinal[4])
0056 {
0057   int M = basis->grid->num_points;
0058   int N = M+2;
0059   // Banded matrix storage.  The first three elements in the
0060   // tinyvector store the tridiagonal coefficients.  The last element
0061   // stores the RHS data.
0062 #ifdef HAVE_C_VARARRAYS
0063   float bands[4*N];
0064 #else
0065   float *bands = new float[4*N];
0066 #endif
0067   
0068 
0069   // Fill up bands
0070   for (int i=0; i<4; i++) {
0071     bands[i]         = abcdInitial[i];
0072     bands[4*(N-1)+i] = abcdFinal[i];
0073   }
0074   for (int i=0; i<M; i++) {
0075     get_NUBasis_funcs_si (basis, i, &(bands[4*(i+1)]));
0076     bands[4*(i+1)+3] = data[datastride*i];
0077   }
0078     
0079   // Now solve:
0080   // First and last rows are different
0081   bands[4*0+1] /= bands[4*0+0];
0082   bands[4*0+2] /= bands[4*0+0];
0083   bands[4*0+3] /= bands[4*0+0];
0084   bands[4*0+0] = 1.0;
0085   bands[4*1+1] -= bands[4*1+0]*bands[4*0+1];
0086   bands[4*1+2] -= bands[4*1+0]*bands[4*0+2];
0087   bands[4*1+3] -= bands[4*1+0]*bands[4*0+3];
0088   bands[4*0+0] = 0.0;
0089   bands[4*1+2] /= bands[4*1+1];
0090   bands[4*1+3] /= bands[4*1+1];
0091   bands[4*1+1] = 1.0;
0092   
0093   // Now do rows 2 through M+1
0094   for (int row=2; row < N-1; row++) {
0095     bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2];
0096     bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3];
0097     bands[4*(row)+2] /= bands[4*(row)+1];
0098     bands[4*(row)+3] /= bands[4*(row)+1];
0099     bands[4*(row)+0] = 0.0;
0100     bands[4*(row)+1] = 1.0;
0101   }
0102 
0103   // Do last row
0104   bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2];
0105   bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3];
0106   bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2];
0107   bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3];
0108   bands[4*(M+1)+3] /= bands[4*(M+1)+2];
0109   bands[4*(M+1)+2] = 1.0;
0110 
0111   p[pstride*(M+1)] = bands[4*(M+1)+3];
0112   // Now back substitute up
0113   for (int row=M; row>0; row--)
0114     p[pstride*(row)] = bands[4*(row)+3] - bands[4*(row)+2]*p[pstride*(row+1)];
0115   
0116   // Finish with first row
0117   p[0] = bands[4*(0)+3] - bands[4*(0)+1]*p[pstride*1] - bands[4*(0)+2]*p[pstride*2];
0118 
0119 #ifndef HAVE_C_VARARRAYS
0120   delete[] bands;
0121 #endif
0122 }
0123 
0124 
0125 
0126 // The number of elements in data should be one less than the number
0127 // of grid points 
0128 void
0129 solve_NUB_periodic_interp_1d_s (NUBasis* restrict basis,
0130                 float* restrict data, int datastride,
0131                 float* restrict p, int pstride)
0132 {
0133   int M = basis->grid->num_points-1;
0134 
0135   // Banded matrix storage.  The first three elements in each row
0136   // store the tridiagonal coefficients.  The last element
0137   // stores the RHS data.
0138 #ifdef HAVE_C_VARARRAYS
0139   float bands[4*M], lastCol[M];
0140 #else
0141   float *bands   = new float[4*M];
0142   float *lastCol = new float[  M];
0143 #endif
0144 
0145   // Fill up bands
0146   for (int i=0; i<M; i++) {
0147     get_NUBasis_funcs_si (basis, i, &(bands[4*i])); 
0148     bands[4*(i)+3] = data[i*datastride];
0149   }
0150     
0151   // Now solve:
0152   // First and last rows are different
0153   bands[4*(0)+2] /= bands[4*(0)+1];
0154   bands[4*(0)+0] /= bands[4*(0)+1];
0155   bands[4*(0)+3] /= bands[4*(0)+1];
0156   bands[4*(0)+1]  = 1.0;
0157   bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
0158   bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
0159   bands[4*(M-1)+2]  = -bands[4*(M-1)+2]*bands[4*(0)+2];
0160   lastCol[0] = bands[4*(0)+0];
0161   
0162   for (int row=1; row < (M-1); row++) {
0163     bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
0164     bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
0165     lastCol[row]   = -bands[4*(row)+0] * lastCol[row-1];
0166     bands[4*(row)+0] = 0.0;
0167     bands[4*(row)+2] /= bands[4*(row)+1];
0168     bands[4*(row)+3] /= bands[4*(row)+1];
0169     lastCol[row]  /= bands[4*(row)+1];
0170     bands[4*(row)+1]  = 1.0;
0171     if (row < (M-2)) {
0172       bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
0173       bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
0174       bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
0175     }
0176   }
0177   
0178   // Now do last row
0179   // The [2] element and [0] element are now on top of each other 
0180   bands[4*(M-1)+0] += bands[4*(M-1)+2];
0181   bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
0182   bands[4*(M-1)+3] -= bands[4*(M-1)+0] *  bands[4*(M-2)+3];
0183   bands[4*(M-1)+3] /= bands[4*(M-1)+1];
0184   p[pstride*M] = bands[4*(M-1)+3];
0185   for (int row=M-2; row>=0; row--) 
0186     p[pstride*(row+1)] = bands[4*(row)+3] - 
0187       bands[4*(row)+2]*p[pstride*(row+2)] - lastCol[row]*p[pstride*M];
0188   
0189   p[pstride*  0  ] = p[pstride*M];
0190   p[pstride*(M+1)] = p[pstride*1];
0191   p[pstride*(M+2)] = p[pstride*2];
0192 #ifndef HAVE_C_VARARRAYS
0193   delete[] bands;
0194   delete[] lastCol;
0195 #endif
0196 }
0197 
0198 
0199 
0200 void
0201 find_NUBcoefs_1d_s (NUBasis* restrict basis, BCtype_s bc,
0202             float *data,  int dstride,
0203             float *coefs, int cstride)
0204 {
0205   if (bc.lCode == PERIODIC) 
0206     solve_NUB_periodic_interp_1d_s (basis, data, dstride, coefs, cstride);
0207   else {
0208     int M = basis->grid->num_points;
0209     // Setup boundary conditions
0210     float bfuncs[4] = {};
0211     float dbfuncs[4] = {};
0212     float abcd_left[4] = {};
0213     float abcd_right[4] = {};
0214     // Left boundary
0215     if (bc.lCode == FLAT || bc.lCode == NATURAL)
0216       bc.lVal = 0.0;
0217     if (bc.lCode == FLAT || bc.lCode == DERIV1) {
0218       get_NUBasis_dfuncs_si (basis, 0, bfuncs, abcd_left);
0219       abcd_left[3] = bc.lVal;
0220     }
0221     if (bc.lCode == NATURAL || bc.lCode == DERIV2) {
0222       get_NUBasis_d2funcs_si (basis, 0, bfuncs, dbfuncs, abcd_left);
0223       abcd_left[3] = bc.lVal;
0224     }
0225     
0226     // Right boundary
0227     if (bc.rCode == FLAT || bc.rCode == NATURAL)
0228       bc.rVal = 0.0;
0229     if (bc.rCode == FLAT || bc.rCode == DERIV1) {
0230       get_NUBasis_dfuncs_si (basis, M-1, bfuncs, abcd_right);
0231       abcd_right[3] = bc.rVal;
0232     }
0233     if (bc.rCode == NATURAL || bc.rCode == DERIV2) {
0234       get_NUBasis_d2funcs_si (basis, M-1, bfuncs, dbfuncs, abcd_right);
0235       abcd_right[3] = bc.rVal;
0236     }
0237     // Now, solve for coefficients
0238     solve_NUB_deriv_interp_1d_s (basis, data, dstride, coefs, cstride,
0239                  abcd_left, abcd_right);
0240   }
0241 }
0242 
0243 
0244 
0245 
0246 NUBspline_1d_s *
0247 create_NUBspline_1d_s (NUgrid* x_grid, BCtype_s xBC, float *data)
0248 {
0249   // First, create the spline structure
0250   NUBspline_1d_s* spline = static_cast<NUBspline_1d_s*>(malloc(sizeof(NUBspline_1d_s)));
0251   if (spline == NULL)
0252     return spline;
0253   spline->sp_code = NU1D;
0254   spline->t_code  = SINGLE_REAL;
0255 
0256   // Next, create the basis
0257   spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
0258   // M is the number of data points (but is unused)
0259 //  int M;
0260 //  if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1;
0261 //  else                       M = x_grid->num_points;
0262   int N = x_grid->num_points + 2;
0263 
0264   // Allocate coefficients and solve  
0265   spline->coefs = (float*)malloc (sizeof(float)*N);
0266   find_NUBcoefs_1d_s (spline->x_basis, xBC, data, 1, spline->coefs, 1);
0267     
0268   return spline;
0269 }
0270 
0271 NUBspline_2d_s *
0272 create_NUBspline_2d_s (NUgrid* x_grid, NUgrid* y_grid,
0273                BCtype_s xBC, BCtype_s yBC, float *data)
0274 {
0275   // First, create the spline structure
0276   NUBspline_2d_s* spline = static_cast<NUBspline_2d_s*>(malloc(sizeof(NUBspline_2d_s)));
0277   if (spline == NULL)
0278     return spline;
0279   spline->sp_code = NU2D;
0280   spline->t_code  = SINGLE_REAL;
0281 
0282   // Next, create the bases
0283   spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
0284   spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
0285   // set but unused
0286   //int Mx,
0287   int My, Nx, Ny;
0288 //  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
0289 //  else                       Mx = x_grid->num_points;
0290   if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
0291   else                       My = y_grid->num_points;
0292 
0293   Nx = x_grid->num_points + 2;
0294   Ny = y_grid->num_points + 2;
0295     
0296   spline->x_stride = Ny;
0297 #ifndef HAVE_SSE2
0298   spline->coefs = (float*)malloc (sizeof(float)*Nx*Ny);
0299 #else
0300   posix_memalign ((void**)&spline->coefs, 16, sizeof(float)*Nx*Ny);
0301 #endif
0302 
0303   // First, solve in the X-direction 
0304   for (int iy=0; iy<My; iy++) {
0305     int doffset = iy;
0306     int coffset = iy;
0307     find_NUBcoefs_1d_s (spline->x_basis, xBC, data+doffset, My,
0308             spline->coefs+coffset, Ny);
0309   }
0310   
0311   // Now, solve in the Y-direction
0312   for (int ix=0; ix<Nx; ix++) {
0313     int doffset = ix*Ny;
0314     int coffset = ix*Ny;
0315     find_NUBcoefs_1d_s (spline->y_basis, yBC, spline->coefs+doffset, 1, 
0316             spline->coefs+coffset, 1);
0317   }
0318     
0319   return spline;
0320 }
0321 
0322 
0323 NUBspline_3d_s *
0324 create_NUBspline_3d_s (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
0325                BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, float *data)
0326 {
0327   // First, create the spline structure
0328   NUBspline_3d_s* spline = static_cast<NUBspline_3d_s*>(malloc(sizeof(NUBspline_3d_s)));
0329   if (spline == NULL)
0330     return spline;
0331   spline->sp_code = NU3D;
0332   spline->t_code  = SINGLE_REAL;
0333 
0334   // Next, create the bases
0335   spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
0336   spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
0337   spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC);
0338   // set but unused
0339   // int Mx,
0340   int My, Mz, Nx, Ny, Nz;
0341 //  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
0342 //  else                       Mx = x_grid->num_points;
0343   if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
0344   else                       My = y_grid->num_points;
0345   if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1;
0346   else                       Mz = z_grid->num_points;
0347 
0348   Nx = x_grid->num_points + 2;
0349   Ny = y_grid->num_points + 2;
0350   Nz = z_grid->num_points + 2;
0351 
0352   // Allocate coefficients and solve  
0353   spline->x_stride = Ny*Nz;
0354   spline->y_stride = Nz;
0355 #ifndef HAVE_SSE2
0356   spline->coefs = (float*)malloc (sizeof(float)*Nx*Ny*Nz);
0357 #else
0358   posix_memalign ((void**)&spline->coefs, 16, sizeof(float)*Nx*Ny*Nz);
0359 #endif
0360 
0361   // First, solve in the X-direction 
0362   for (int iy=0; iy<My; iy++) 
0363     for (int iz=0; iz<Mz; iz++) {
0364       int doffset = iy*Mz+iz;
0365       int coffset = iy*Nz+iz;
0366       find_NUBcoefs_1d_s (spline->x_basis, xBC, data+doffset, My*Mz,
0367               spline->coefs+coffset, Ny*Nz);
0368     }
0369   
0370   // Now, solve in the Y-direction
0371   for (int ix=0; ix<Nx; ix++) 
0372     for (int iz=0; iz<Nz; iz++) {
0373       int doffset = ix*Ny*Nz + iz;
0374       int coffset = ix*Ny*Nz + iz;
0375       find_NUBcoefs_1d_s (spline->y_basis, yBC, spline->coefs+doffset, Nz, 
0376               spline->coefs+coffset, Nz);
0377     }
0378 
0379   // Now, solve in the Z-direction
0380   for (int ix=0; ix<Nx; ix++) 
0381     for (int iy=0; iy<Ny; iy++) {
0382       int doffset = (ix*Ny+iy)*Nz;
0383       int coffset = (ix*Ny+iy)*Nz;
0384       find_NUBcoefs_1d_s (spline->z_basis, zBC, spline->coefs+doffset, 1, 
0385               spline->coefs+coffset, 1);
0386     }
0387   return spline;
0388 }
0389 
0390 ////////////////////////////////////////////////////////
0391 ////////////////////////////////////////////////////////
0392 //// Double-precision real creation routines        ////
0393 ////////////////////////////////////////////////////////
0394 ////////////////////////////////////////////////////////
0395 void
0396 solve_NUB_deriv_interp_1d_d (NUBasis* restrict basis, 
0397                  double* restrict data, int datastride,
0398                  double* restrict    p, int pstride,
0399                  double abcdInitial[4], double abcdFinal[4])
0400 {
0401   int M = basis->grid->num_points;
0402   int N = M+2;
0403   // Banded matrix storage.  The first three elements in the
0404   // tinyvector store the tridiagonal coefficients.  The last element
0405   // stores the RHS data.
0406 #ifdef HAVE_C_VARARRAYS
0407   double bands[4*N];
0408 #else
0409   double *bands = new double[4*N];
0410 #endif
0411 
0412   // Fill up bands
0413   for (int i=0; i<4; i++) {
0414     bands[i]         = abcdInitial[i];
0415     bands[4*(N-1)+i] = abcdFinal[i];
0416   }
0417   for (int i=0; i<M; i++) {
0418     get_NUBasis_funcs_di (basis, i, &(bands[4*(i+1)]));
0419     bands[4*(i+1)+3] = data[datastride*i];
0420   }
0421   /* for (int i=0; i<4*N; i++)
0422      if (isnan(bands[i]))
0423      fprintf(stderr, "NAN at bands[%d].\n", i); */
0424     
0425   // Now solve:
0426   // First and last rows are different
0427   bands[4*0+1] /= bands[4*0+0];
0428   bands[4*0+2] /= bands[4*0+0];
0429   bands[4*0+3] /= bands[4*0+0];
0430   bands[4*0+0] = 1.0;
0431   bands[4*1+1] -= bands[4*1+0]*bands[4*0+1];
0432   bands[4*1+2] -= bands[4*1+0]*bands[4*0+2];
0433   bands[4*1+3] -= bands[4*1+0]*bands[4*0+3];
0434   bands[4*0+0] = 0.0;
0435   bands[4*1+2] /= bands[4*1+1];
0436   bands[4*1+3] /= bands[4*1+1];
0437   bands[4*1+1] = 1.0;
0438   
0439   // Now do rows 2 through M+1
0440   for (int row=2; row < N-1; row++) {
0441     bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2];
0442     bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3];
0443     bands[4*(row)+2] /= bands[4*(row)+1];
0444     bands[4*(row)+3] /= bands[4*(row)+1];
0445     bands[4*(row)+0] = 0.0;
0446     bands[4*(row)+1] = 1.0;
0447   }
0448 
0449   // Do last row
0450   bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2];
0451   bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3];
0452   bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2];
0453   bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3];
0454   bands[4*(M+1)+3] /= bands[4*(M+1)+2];
0455   bands[4*(M+1)+2] = 1.0;
0456 
0457   p[pstride*(M+1)] = bands[4*(M+1)+3];
0458   // Now back substitute up
0459   for (int row=M; row>0; row--)
0460     p[pstride*(row)] = bands[4*(row)+3] - bands[4*(row)+2]*p[pstride*(row+1)];
0461   
0462   // Finish with first row
0463   p[0] = bands[4*(0)+3] - bands[4*(0)+1]*p[pstride*1] - bands[4*(0)+2]*p[pstride*2];
0464 #ifndef HAVE_C_VARARRAYS
0465   delete[] bands;
0466 #endif
0467 }
0468 
0469 
0470 void
0471 solve_NUB_periodic_interp_1d_d (NUBasis* restrict basis,
0472                 double* restrict data, int datastride,
0473                 double* restrict p, int pstride)
0474 {
0475   int M = basis->grid->num_points-1;
0476 
0477   // Banded matrix storage.  The first three elements in the
0478   // tinyvector store the tridiagonal coefficients.  The last element
0479   // stores the RHS data.
0480 #ifdef HAVE_C_VARARRAYS
0481   double bands[4*M], lastCol[M];
0482 #else
0483   double *bands   = new double[4*M];
0484   double *lastCol = new double[  M];
0485 #endif
0486 
0487   // Fill up bands
0488   for (int i=0; i<M; i++) {
0489     get_NUBasis_funcs_di (basis, i, &(bands[4*i])); 
0490     bands[4*(i)+3] = data[i*datastride];
0491   }
0492     
0493   // Now solve:
0494   // First and last rows are different
0495   bands[4*(0)+2] /= bands[4*(0)+1];
0496   bands[4*(0)+0] /= bands[4*(0)+1];
0497   bands[4*(0)+3] /= bands[4*(0)+1];
0498   bands[4*(0)+1]  = 1.0;
0499   bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
0500   bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
0501   bands[4*(M-1)+2]  = -bands[4*(M-1)+2]*bands[4*(0)+2];
0502   lastCol[0] = bands[4*(0)+0];
0503   
0504   for (int row=1; row < (M-1); row++) {
0505     bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
0506     bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
0507     lastCol[row]   = -bands[4*(row)+0] * lastCol[row-1];
0508     bands[4*(row)+0] = 0.0;
0509     bands[4*(row)+2] /= bands[4*(row)+1];
0510     bands[4*(row)+3] /= bands[4*(row)+1];
0511     lastCol[row]  /= bands[4*(row)+1];
0512     bands[4*(row)+1]  = 1.0;
0513     if (row < (M-2)) {
0514       bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
0515       bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
0516       bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
0517     }
0518   }
0519   
0520   // Now do last row
0521   // The [2] element and [0] element are now on top of each other 
0522   bands[4*(M-1)+0] += bands[4*(M-1)+2];
0523   bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
0524   bands[4*(M-1)+3] -= bands[4*(M-1)+0] *  bands[4*(M-2)+3];
0525   bands[4*(M-1)+3] /= bands[4*(M-1)+1];
0526   p[pstride*M] = bands[4*(M-1)+3];
0527   for (int row=M-2; row>=0; row--) 
0528     p[pstride*(row+1)] = bands[4*(row)+3] - 
0529       bands[4*(row)+2]*p[pstride*(row+2)] - lastCol[row]*p[pstride*M];
0530   
0531   p[pstride*  0  ] = p[pstride*M];
0532   p[pstride*(M+1)] = p[pstride*1];
0533   p[pstride*(M+2)] = p[pstride*2];
0534 #ifndef HAVE_C_VARARRAYS
0535   delete[] bands;
0536   delete[] lastCol;
0537 #endif
0538 }
0539 
0540 
0541 
0542 void
0543 find_NUBcoefs_1d_d (NUBasis* restrict basis, BCtype_d bc,
0544             double *data,  int dstride,
0545             double *coefs, int cstride)
0546 {
0547   if (bc.lCode == PERIODIC) 
0548     solve_NUB_periodic_interp_1d_d (basis, data, dstride, coefs, cstride);
0549   else {
0550     int M = basis->grid->num_points;
0551     // Setup boundary conditions
0552     double bfuncs[4] {};
0553     double dbfuncs[4] {};
0554     double abcd_left[4] {};
0555     double abcd_right[4] {};
0556 
0557     // Left boundary
0558     if (bc.lCode == FLAT || bc.lCode == NATURAL)
0559       bc.lVal = 0.0;
0560     if (bc.lCode == FLAT || bc.lCode == DERIV1) {
0561       get_NUBasis_dfuncs_di (basis, 0, bfuncs, abcd_left);
0562       abcd_left[3] = bc.lVal;
0563     }
0564     if (bc.lCode == NATURAL || bc.lCode == DERIV2) {
0565       get_NUBasis_d2funcs_di (basis, 0, bfuncs, dbfuncs, abcd_left);
0566       abcd_left[3] = bc.lVal;
0567     }
0568     
0569     // Right boundary
0570     if (bc.rCode == FLAT || bc.rCode == NATURAL)
0571       bc.rVal = 0.0;
0572     if (bc.rCode == FLAT || bc.rCode == DERIV1) {
0573       get_NUBasis_dfuncs_di (basis, M-1, bfuncs, abcd_right);
0574       abcd_right[3] = bc.rVal;
0575     }
0576     if (bc.rCode == NATURAL || bc.rCode == DERIV2) {
0577       get_NUBasis_d2funcs_di (basis, M-1, bfuncs, dbfuncs, abcd_right);
0578       abcd_right[3] = bc.rVal;
0579     }
0580 
0581     // Now, solve for coefficients
0582     solve_NUB_deriv_interp_1d_d (basis, data, dstride, coefs, cstride,
0583                  abcd_left, abcd_right);
0584   }
0585 }
0586 
0587 
0588 
0589 
0590 NUBspline_1d_d *
0591 create_NUBspline_1d_d (NUgrid* x_grid, BCtype_d xBC, double *data)
0592 {
0593   // First, create the spline structure
0594   NUBspline_1d_d* spline = static_cast<NUBspline_1d_d*>(malloc(sizeof(NUBspline_1d_d)));
0595   if (spline == NULL)
0596     return spline;
0597   spline->sp_code = NU1D;
0598   spline->t_code  = DOUBLE_REAL;
0599 
0600   // Next, create the basis
0601   spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
0602   // M is the number of data points (set but unused)
0603 //  int M;
0604 //  if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1;
0605 //  else                       M = x_grid->num_points;
0606   int N = x_grid->num_points + 2;
0607 
0608   // Allocate coefficients and solve
0609   spline->coefs = (double*)malloc (sizeof(double)*N);
0610   find_NUBcoefs_1d_d (spline->x_basis, xBC, data, 1, spline->coefs, 1);
0611     
0612   return spline;
0613 }
0614 
0615 NUBspline_2d_d *
0616 create_NUBspline_2d_d (NUgrid* x_grid, NUgrid* y_grid,
0617                BCtype_d xBC, BCtype_d yBC, double *data)
0618 {
0619   // First, create the spline structure
0620   NUBspline_2d_d* spline = static_cast<NUBspline_2d_d*>(malloc(sizeof(NUBspline_2d_d)));
0621   if (spline == NULL)
0622     return spline;
0623   spline->sp_code = NU2D;
0624   spline->t_code  = DOUBLE_REAL;
0625 
0626   // Next, create the bases
0627   spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
0628   spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
0629 
0630   // int Mx, (set but unused)
0631   int My, Nx, Ny;
0632 //  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
0633 //  else                       Mx = x_grid->num_points;
0634   if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
0635   else                       My = y_grid->num_points;
0636 
0637   Nx = x_grid->num_points + 2;
0638   Ny = y_grid->num_points + 2;
0639   
0640   spline->x_stride = Ny;
0641 #ifndef HAVE_SSE2
0642   spline->coefs = (double*)malloc (sizeof(double)*Nx*Ny);
0643 #else
0644   posix_memalign ((void**)&spline->coefs, 16, sizeof(double)*Nx*Ny);
0645 #endif
0646 
0647   // First, solve in the X-direction 
0648   for (int iy=0; iy<My; iy++) {
0649     int doffset = iy;
0650     int coffset = iy;
0651     find_NUBcoefs_1d_d (spline->x_basis, xBC, data+doffset, My,
0652             spline->coefs+coffset, Ny);
0653   }
0654   
0655   // Now, solve in the Y-direction
0656   for (int ix=0; ix<Nx; ix++) {
0657     int doffset = ix*Ny;
0658     int coffset = ix*Ny;
0659     find_NUBcoefs_1d_d (spline->y_basis, yBC, spline->coefs+doffset, 1, 
0660             spline->coefs+coffset, 1);
0661   }
0662     
0663   return spline;
0664 }
0665 
0666 
0667 NUBspline_3d_d *
0668 create_NUBspline_3d_d (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
0669                BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, double *data)
0670 {
0671   // First, create the spline structure
0672   NUBspline_3d_d* spline = static_cast<NUBspline_3d_d*>(malloc(sizeof(NUBspline_3d_d)));
0673   if (spline == NULL)
0674     return spline;
0675   spline->sp_code = NU3D;
0676   spline->t_code  = DOUBLE_REAL;
0677 
0678   // Next, create the bases
0679   spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
0680   spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
0681   spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC);
0682 
0683   // set but unused
0684   //int Mx,
0685   int My, Mz, Nx, Ny, Nz;
0686 //  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
0687 //  else                       Mx = x_grid->num_points;
0688   if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
0689   else                       My = y_grid->num_points;
0690   if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1;
0691   else                       Mz = z_grid->num_points;
0692 
0693   Nx = x_grid->num_points + 2;
0694   Ny = y_grid->num_points + 2;
0695   Nz = z_grid->num_points + 2;
0696   
0697   spline->x_stride = Ny*Nz;
0698   spline->y_stride = Nz;
0699 #ifndef HAVE_SSE2
0700   spline->coefs = (double*)malloc (sizeof(double)*Nx*Ny*Nz);
0701 #else
0702   posix_memalign ((void**)&spline->coefs, 16, sizeof(double)*Nx*Ny*Nz);
0703 #endif
0704 
0705   // First, solve in the X-direction 
0706   for (int iy=0; iy<My; iy++) 
0707     for (int iz=0; iz<Mz; iz++) {
0708       int doffset = iy*Mz+iz;
0709       int coffset = iy*Nz+iz;
0710       find_NUBcoefs_1d_d (spline->x_basis, xBC, data+doffset, My*Mz,
0711               spline->coefs+coffset, Ny*Nz);
0712     }
0713   
0714   // Now, solve in the Y-direction
0715   for (int ix=0; ix<Nx; ix++) 
0716     for (int iz=0; iz<Nz; iz++) {
0717       int doffset = ix*Ny*Nz + iz;
0718       int coffset = ix*Ny*Nz + iz;
0719       find_NUBcoefs_1d_d (spline->y_basis, yBC, spline->coefs+doffset, Nz, 
0720               spline->coefs+coffset, Nz);
0721     }
0722 
0723   // Now, solve in the Z-direction
0724   for (int ix=0; ix<Nx; ix++) 
0725     for (int iy=0; iy<Ny; iy++) {
0726       int doffset = (ix*Ny+iy)*Nz;
0727       int coffset = (ix*Ny+iy)*Nz;
0728       find_NUBcoefs_1d_d (spline->z_basis, zBC, spline->coefs+doffset, 1, 
0729               spline->coefs+coffset, 1);
0730     }
0731   return spline;
0732 }
0733 
0734 
0735 ////////////////////////////////////////////////////////
0736 ////////////////////////////////////////////////////////
0737 //// Single-precision complex creation routines     ////
0738 ////////////////////////////////////////////////////////
0739 ////////////////////////////////////////////////////////
0740 
0741 void
0742 find_NUBcoefs_1d_c (NUBasis* restrict basis, BCtype_c bc,
0743             complex_float *data,  int dstride,
0744             complex_float *coefs, int cstride)
0745 {
0746   BCtype_s bc_r, bc_i;
0747   bc_r.lCode = bc.lCode;   bc_i.lCode = bc.lCode;
0748   bc_r.rCode = bc.rCode;   bc_i.rCode = bc.rCode;
0749   bc_r.lVal  = bc.lVal_r;  bc_r.rVal  = bc.rVal_r;
0750   bc_i.lVal  = bc.lVal_i;  bc_i.rVal  = bc.rVal_i;
0751 
0752   float *data_r  = ((float*)data );
0753   float *data_i  = ((float*)data )+1;
0754   float *coefs_r = ((float*)coefs);
0755   float *coefs_i = ((float*)coefs)+1;
0756 
0757   find_NUBcoefs_1d_s (basis, bc_r, data_r, 2*dstride, coefs_r, 2*cstride);
0758   find_NUBcoefs_1d_s (basis, bc_i, data_i, 2*dstride, coefs_i, 2*cstride);
0759 }
0760 
0761 
0762 NUBspline_1d_c *
0763 create_NUBspline_1d_c (NUgrid* x_grid, BCtype_c xBC, complex_float *data)
0764 {
0765   // First, create the spline structure
0766   NUBspline_1d_c* spline = static_cast<NUBspline_1d_c*>(malloc(sizeof(NUBspline_1d_c)));
0767   if (spline == NULL)
0768     return spline;
0769   spline->sp_code = NU1D;
0770   spline->t_code  = SINGLE_COMPLEX;
0771 
0772   // Next, create the basis
0773   spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
0774   // M is the number of data points
0775 //  int M;
0776 //  if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1;
0777 //  else                       M = x_grid->num_points;
0778   int N = x_grid->num_points + 2;
0779 
0780   // Allocate coefficients and solve  
0781   spline->coefs = (complex_float*)malloc (sizeof(complex_float)*N);
0782   find_NUBcoefs_1d_c (spline->x_basis, xBC, data, 1, spline->coefs, 1);
0783     
0784   return spline;
0785 }
0786 
0787 NUBspline_2d_c *
0788 create_NUBspline_2d_c (NUgrid* x_grid, NUgrid* y_grid,
0789                BCtype_c xBC, BCtype_c yBC, complex_float *data)
0790 {
0791   // First, create the spline structure
0792   NUBspline_2d_c* spline = static_cast<NUBspline_2d_c*>(malloc(sizeof(NUBspline_2d_c)));
0793   if (spline == NULL)
0794     return spline;
0795   spline->sp_code = NU2D;
0796   spline->t_code  = SINGLE_COMPLEX;
0797 
0798   // Next, create the bases
0799   spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
0800   spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
0801 //  int Mx,
0802   int My, Nx, Ny;
0803 //  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
0804 //  else                       Mx = x_grid->num_points;
0805   if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
0806   else                       My = y_grid->num_points;
0807 
0808   Nx = x_grid->num_points + 2;
0809   Ny = y_grid->num_points + 2;
0810     
0811   spline->x_stride = Ny;
0812 #ifndef HAVE_SSE2
0813   spline->coefs = (complex_float*)malloc (sizeof(complex_float)*Nx*Ny);
0814 #else
0815   posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_float)*Nx*Ny);
0816 #endif
0817 
0818   // First, solve in the X-direction 
0819   for (int iy=0; iy<My; iy++) {
0820     int doffset = iy;
0821     int coffset = iy;
0822     find_NUBcoefs_1d_c (spline->x_basis, xBC, data+doffset, My,
0823             spline->coefs+coffset, Ny);
0824   }
0825   
0826   // Now, solve in the Y-direction
0827   for (int ix=0; ix<Nx; ix++) {
0828     int doffset = ix*Ny;
0829     int coffset = ix*Ny;
0830     find_NUBcoefs_1d_c (spline->y_basis, yBC, spline->coefs+doffset, 1, 
0831             spline->coefs+coffset, 1);
0832   }
0833     
0834   return spline;
0835 }
0836 
0837 
0838 NUBspline_3d_c *
0839 create_NUBspline_3d_c (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
0840                BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, complex_float *data)
0841 {
0842   // First, create the spline structure
0843   NUBspline_3d_c* spline = static_cast<NUBspline_3d_c*>(malloc(sizeof(NUBspline_3d_c)));
0844   if (spline == NULL)
0845     return spline;
0846   spline->sp_code = NU3D;
0847   spline->t_code  = SINGLE_COMPLEX;
0848 
0849   // Next, create the bases
0850   spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
0851   spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
0852   spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC);
0853 //  int Mx,
0854   int My, Mz, Nx, Ny, Nz;
0855 //  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
0856 //  else                       Mx = x_grid->num_points;
0857   if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
0858   else                       My = y_grid->num_points;
0859   if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1;
0860   else                       Mz = z_grid->num_points;
0861 
0862   Nx = x_grid->num_points + 2;
0863   Ny = y_grid->num_points + 2;
0864   Nz = z_grid->num_points + 2;
0865 
0866   // Allocate coefficients and solve  
0867   spline->x_stride = Ny*Nz;
0868   spline->y_stride = Nz;
0869 #ifndef HAVE_SSE2
0870   spline->coefs = (complex_float*)malloc (sizeof(complex_float)*Nx*Ny*Nz);
0871 #else
0872   posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_float)*Nx*Ny*Nz);
0873 #endif
0874 
0875   // First, solve in the X-direction 
0876   for (int iy=0; iy<My; iy++) 
0877     for (int iz=0; iz<Mz; iz++) {
0878       int doffset = iy*Mz+iz;
0879       int coffset = iy*Nz+iz;
0880       find_NUBcoefs_1d_c (spline->x_basis, xBC, data+doffset, My*Mz,
0881               spline->coefs+coffset, Ny*Nz);
0882     }
0883   
0884   // Now, solve in the Y-direction
0885   for (int ix=0; ix<Nx; ix++) 
0886     for (int iz=0; iz<Nz; iz++) {
0887       int doffset = ix*Ny*Nz + iz;
0888       int coffset = ix*Ny*Nz + iz;
0889       find_NUBcoefs_1d_c (spline->y_basis, yBC, spline->coefs+doffset, Nz, 
0890               spline->coefs+coffset, Nz);
0891     }
0892 
0893   // Now, solve in the Z-direction
0894   for (int ix=0; ix<Nx; ix++) 
0895     for (int iy=0; iy<Ny; iy++) {
0896       int doffset = (ix*Ny+iy)*Nz;
0897       int coffset = (ix*Ny+iy)*Nz;
0898       find_NUBcoefs_1d_c (spline->z_basis, zBC, spline->coefs+doffset, 1, 
0899               spline->coefs+coffset, 1);
0900     }
0901   return spline;
0902 }
0903 
0904 ////////////////////////////////////////////////////////
0905 ////////////////////////////////////////////////////////
0906 //// Double-precision complex creation routines     ////
0907 ////////////////////////////////////////////////////////
0908 ////////////////////////////////////////////////////////
0909 
0910 void
0911 find_NUBcoefs_1d_z (NUBasis* restrict basis, BCtype_z bc,
0912             complex_double *data,  int dstride,
0913             complex_double *coefs, int cstride)
0914 {
0915   BCtype_d bc_r, bc_i;
0916   bc_r.lCode = bc.lCode;   bc_i.lCode = bc.lCode;
0917   bc_r.rCode = bc.rCode;   bc_i.rCode = bc.rCode;
0918   bc_r.lVal  = bc.lVal_r;  bc_r.rVal  = bc.rVal_r;
0919   bc_i.lVal  = bc.lVal_i;  bc_i.rVal  = bc.rVal_i;
0920 
0921   double *data_r  = ((double*)data );
0922   double *data_i  = ((double*)data )+1;
0923   double *coefs_r = ((double*)coefs);
0924   double *coefs_i = ((double*)coefs)+1;
0925 
0926   find_NUBcoefs_1d_d (basis, bc_r, data_r, 2*dstride, coefs_r, 2*cstride);
0927   find_NUBcoefs_1d_d (basis, bc_i, data_i, 2*dstride, coefs_i, 2*cstride);
0928 }
0929 
0930 
0931 NUBspline_1d_z *
0932 create_NUBspline_1d_z (NUgrid* x_grid, BCtype_z xBC, complex_double *data)
0933 {
0934   // First, create the spline structure
0935   NUBspline_1d_z* spline = static_cast<NUBspline_1d_z*>(malloc(sizeof(NUBspline_1d_z)));
0936   if (spline == NULL)
0937     return spline;
0938   spline->sp_code = NU1D;
0939   spline->t_code  = DOUBLE_COMPLEX;
0940 
0941   // Next, create the basis
0942   spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
0943   // M is the number of data points
0944 //  int M;
0945 //  if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1;
0946 //  else                       M = x_grid->num_points;
0947   int N = x_grid->num_points + 2;
0948 
0949   // Allocate coefficients and solve  
0950   spline->coefs = (complex_double*)malloc (sizeof(complex_double)*N);
0951   find_NUBcoefs_1d_z (spline->x_basis, xBC, data, 1, spline->coefs, 1);
0952     
0953   return spline;
0954 }
0955 
0956 NUBspline_2d_z *
0957 create_NUBspline_2d_z (NUgrid* x_grid, NUgrid* y_grid,
0958                BCtype_z xBC, BCtype_z yBC, complex_double *data)
0959 {
0960   // First, create the spline structure
0961   NUBspline_2d_z* spline = static_cast<NUBspline_2d_z*>(malloc(sizeof(NUBspline_2d_z)));
0962   if (spline == NULL)
0963     return spline;
0964   spline->sp_code = NU2D;
0965   spline->t_code  = DOUBLE_COMPLEX;
0966 
0967   // Next, create the bases
0968   spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
0969   spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
0970 //  int Mx,
0971   int My, Nx, Ny;
0972 //  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
0973 //  else                       Mx = x_grid->num_points;
0974   if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
0975   else                       My = y_grid->num_points;
0976 
0977   Nx = x_grid->num_points + 2;
0978   Ny = y_grid->num_points + 2;
0979     
0980   spline->x_stride = Ny;
0981 #ifndef HAVE_SSE2
0982   spline->coefs = (complex_double*)malloc (sizeof(complex_double)*Nx*Ny);
0983 #else
0984   posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_double)*Nx*Ny);
0985 #endif
0986 
0987   // First, solve in the X-direction 
0988   for (int iy=0; iy<My; iy++) {
0989     int doffset = iy;
0990     int coffset = iy;
0991     find_NUBcoefs_1d_z (spline->x_basis, xBC, data+doffset, My,
0992             spline->coefs+coffset, Ny);
0993   }
0994   
0995   // Now, solve in the Y-direction
0996   for (int ix=0; ix<Nx; ix++) {
0997     int doffset = ix*Ny;
0998     int coffset = ix*Ny;
0999     find_NUBcoefs_1d_z (spline->y_basis, yBC, spline->coefs+doffset, 1, 
1000             spline->coefs+coffset, 1);
1001   }
1002     
1003   return spline;
1004 }
1005 
1006 
1007 NUBspline_3d_z *
1008 create_NUBspline_3d_z (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
1009                BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, complex_double *data)
1010 {
1011   // First, create the spline structure
1012   NUBspline_3d_z* spline = static_cast<NUBspline_3d_z*>(malloc(sizeof(NUBspline_3d_z)));
1013   if (spline == NULL)
1014     return spline;
1015   spline->sp_code = NU3D;
1016   spline->t_code  = DOUBLE_COMPLEX;
1017   spline->x_grid = x_grid;
1018   spline->y_grid = y_grid;
1019   spline->z_grid = z_grid;
1020 
1021   // Next, create the bases
1022   spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
1023   spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
1024   spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC);
1025 //  int Mx,
1026   int My, Mz, Nx, Ny, Nz;
1027 //  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
1028 //  else                       Mx = x_grid->num_points;
1029   if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
1030   else                       My = y_grid->num_points;
1031   if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1;
1032   else                       Mz = z_grid->num_points;
1033 
1034   Nx = x_grid->num_points + 2;
1035   Ny = y_grid->num_points + 2;
1036   Nz = z_grid->num_points + 2;
1037 
1038   // Allocate coefficients and solve  
1039   spline->x_stride = Ny*Nz;
1040   spline->y_stride = Nz;
1041 #ifndef HAVE_SSE2
1042   spline->coefs = (complex_double*)malloc (sizeof(complex_double)*Nx*Ny*Nz);
1043 #else
1044   posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_double)*Nx*Ny*Nz);
1045 #endif
1046 
1047   // First, solve in the X-direction 
1048   for (int iy=0; iy<My; iy++) 
1049     for (int iz=0; iz<Mz; iz++) {
1050       int doffset = iy*Mz+iz;
1051       int coffset = iy*Nz+iz;
1052       find_NUBcoefs_1d_z (spline->x_basis, xBC, data+doffset, My*Mz,
1053               spline->coefs+coffset, Ny*Nz);
1054       /* for (int ix=0; ix<Nx; ix++) {
1055     complex_double z = spline->coefs[coffset+ix*spline->x_stride];
1056     if (isnan(creal(z)))
1057       fprintf (stderr, "NAN encountered in create_NUBspline_3d_z at real part of (%d,%d,%d)\n",
1058            ix,iy,iz);
1059     if (isnan(cimag(z)))
1060       fprintf (stderr, "NAN encountered in create_NUBspline_3d_z at imag part of (%d,%d,%d)\n",
1061            ix,iy,iz);
1062        } */
1063     }
1064   
1065   // Now, solve in the Y-direction
1066   for (int ix=0; ix<Nx; ix++) 
1067     for (int iz=0; iz<Nz; iz++) {
1068       int doffset = ix*Ny*Nz + iz;
1069       int coffset = ix*Ny*Nz + iz;
1070       find_NUBcoefs_1d_z (spline->y_basis, yBC, spline->coefs+doffset, Nz, 
1071               spline->coefs+coffset, Nz);
1072     }
1073 
1074   // Now, solve in the Z-direction
1075   for (int ix=0; ix<Nx; ix++) 
1076     for (int iy=0; iy<Ny; iy++) {
1077       int doffset = (ix*Ny+iy)*Nz;
1078       int coffset = (ix*Ny+iy)*Nz;
1079       find_NUBcoefs_1d_z (spline->z_basis, zBC, spline->coefs+doffset, 1, 
1080               spline->coefs+coffset, 1);
1081     }
1082   return spline;
1083 }
1084 
1085 
1086 void
1087 destroy_NUBspline(Bspline *spline)
1088 {
1089     free(spline->coefs);
1090   switch (spline->sp_code) {
1091   case NU1D:
1092     destroy_NUBasis (((NUBspline_1d*)spline)->x_basis);
1093     break;
1094   case NU2D:
1095     destroy_NUBasis (((NUBspline_2d*)spline)->x_basis);
1096     destroy_NUBasis (((NUBspline_2d*)spline)->y_basis);
1097     break;
1098     
1099   case NU3D:
1100     destroy_NUBasis (((NUBspline_3d*)spline)->x_basis);
1101     destroy_NUBasis (((NUBspline_3d*)spline)->y_basis);
1102     destroy_NUBasis (((NUBspline_3d*)spline)->z_basis);
1103     break;
1104   case U1D:
1105   case U2D:
1106   case MULTI_U1D:
1107   case MULTI_U2D:
1108   case MULTI_U3D:
1109   case MULTI_NU1D:
1110   case MULTI_NU2D:
1111   case MULTI_NU3D:
1112   default:
1113       ;
1114   }
1115   free(spline);
1116 }
1117