File indexing completed on 2024-12-22 04:09:45

0001 /////////////////////////////////////////////////////////////////////////////
0002 //  einspline:  a library for creating and evaluating B-splines            //
0003 //  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
0004 //                                                                         //
0005 //  This program is free software; you can redistribute it and/or modify   //
0006 //  it under the terms of the GNU General Public License as published by   //
0007 //  the Free Software Foundation; either version 2 of the License, or      //
0008 //  (at your option) any later version.                                    //
0009 //                                                                         //
0010 //  This program is distributed in the hope that it will be useful,        //
0011 //  but WITHOUT ANY WARRANTY; without even the implied warranty of         //
0012 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
0013 //  GNU General Public License for more details.                           //
0014 //                                                                         //
0015 //  You should have received a copy of the GNU General Public License      //
0016 //  along with this program; if not, write to the Free Software            //
0017 //  Foundation, Inc., 51 Franklin Street, Fifth Floor,                     //
0018 //  Boston, MA  02110-1301  USA                                            //
0019 /////////////////////////////////////////////////////////////////////////////
0020 
0021 #include "multi_bspline_create.h"
0022 #ifndef _XOPEN_SOURCE
0023 #define _XOPEN_SOURCE 600
0024 #endif
0025 #ifndef __USE_XOPEN2K
0026   #define __USE_XOPEN2K
0027 #endif
0028 #include <stdlib.h>
0029 #include <stdio.h>
0030 
0031 int posix_memalign(void **memptr, size_t alignment, size_t size);
0032 
0033 ////////////////////////////////////////////////////////////
0034 ////////////////////////////////////////////////////////////
0035 ////       Helper functions for spline creation         ////
0036 ////////////////////////////////////////////////////////////
0037 ////////////////////////////////////////////////////////////
0038 void init_sse_data();
0039 
0040 void
0041 find_coefs_1d_d (Ugrid grid, BCtype_d bc, 
0042          double *data,  intptr_t dstride,
0043          double *coefs, intptr_t cstride);
0044 
0045 void 
0046 solve_deriv_interp_1d_s (float bands[], float coefs[],
0047              int M, int cstride);
0048 
0049 // On input, bands should be filled with:
0050 // row 0   :  abcdInitial from boundary conditions
0051 // rows 1:M:  basis functions in first 3 cols, data in last
0052 // row M+1 :  abcdFinal   from boundary conditions
0053 // cstride gives the stride between values in coefs.
0054 // On exit, coefs with contain interpolating B-spline coefs
0055 void 
0056 solve_periodic_interp_1d_s (float bands[], float coefs[],
0057                 int M, int cstride);
0058 
0059 // On input, bands should be filled with:
0060 // row 0   :  abcdInitial from boundary conditions
0061 // rows 1:M:  basis functions in first 3 cols, data in last
0062 // row M+1 :  abcdFinal   from boundary conditions
0063 // cstride gives the stride between values in coefs.
0064 // On exit, coefs with contain interpolating B-spline coefs
0065 void 
0066 solve_antiperiodic_interp_1d_s (float bands[], float coefs[],
0067                 int M, int cstride);
0068 
0069 void
0070 find_coefs_1d_s (Ugrid grid, BCtype_s bc, 
0071          float *data,  intptr_t dstride,
0072          float *coefs, intptr_t cstride);
0073 
0074 ////////////////////////////////////////////////////////////
0075 ////////////////////////////////////////////////////////////
0076 ////     Single-Precision, Real Creation Routines       ////
0077 ////////////////////////////////////////////////////////////
0078 ////////////////////////////////////////////////////////////
0079 
0080 // On input, bands should be filled with:
0081 // row 0   :  abcdInitial from boundary conditions
0082 // rows 1:M:  basis functions in first 3 cols, data in last
0083 // row M+1 :  abcdFinal   from boundary conditions
0084 // cstride gives the stride between values in coefs.
0085 // On exit, coefs with contain interpolating B-spline coefs
0086 multi_UBspline_1d_s*
0087 create_multi_UBspline_1d_s (Ugrid x_grid, BCtype_s xBC, int num_splines)
0088 {
0089   // Create new spline
0090   multi_UBspline_1d_s* restrict spline = static_cast<multi_UBspline_1d_s*>(malloc(sizeof(multi_UBspline_1d_s)));
0091   if (!spline) {
0092     fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_1d_s.\n");
0093     abort();
0094   }
0095   spline->spcode = MULTI_U1D;
0096   spline->tcode  = SINGLE_REAL;
0097   spline->xBC = xBC; spline->x_grid = x_grid;
0098   spline->num_splines = num_splines;
0099 
0100   // Setup internal variables
0101   int Mx = x_grid.num;
0102   int Nx;
0103 
0104   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
0105     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num);
0106     Nx = Mx+3;
0107   }
0108   else {
0109     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
0110     Nx = Mx+2;
0111   }
0112 
0113   int N = num_splines;
0114 #ifdef HAVE_SSE
0115   if (N % 4) 
0116     N += 4 - (N % 4);
0117 #endif 
0118 
0119   spline->x_stride = N;
0120   x_grid.delta_inv = 1.0/x_grid.delta;
0121   spline->x_grid   = x_grid;
0122 #ifndef HAVE_POSIX_MEMALIGN
0123   spline->coefs = (float*)malloc (sizeof(float)*Nx*N);
0124 #else
0125   posix_memalign ((void**)&spline->coefs, 64, (sizeof(float)*Nx*N));
0126 #endif
0127 #ifdef HAVE_SSE
0128   init_sse_data();    
0129 #endif
0130   if (!spline->coefs) {
0131     fprintf (stderr, "Out of memory allocating spline coefficient in create_multi_UBspline_1d_s.\n");
0132     abort();
0133   }
0134 
0135 
0136   return spline;
0137 }
0138 
0139 void
0140 set_multi_UBspline_1d_s (multi_UBspline_1d_s *spline, int num,
0141              float *data)
0142 {
0143   float *coefs = spline->coefs + num;
0144   int xs = spline->x_stride;
0145   find_coefs_1d_s (spline->x_grid, spline->xBC, data, 1, 
0146            coefs, xs);
0147 }
0148 
0149 
0150 multi_UBspline_2d_s*
0151 create_multi_UBspline_2d_s (Ugrid x_grid, Ugrid y_grid,
0152                 BCtype_s xBC, BCtype_s yBC, int num_splines)
0153 {
0154   // Create new spline
0155   multi_UBspline_2d_s* restrict spline = static_cast<multi_UBspline_2d_s*>(malloc(sizeof(multi_UBspline_2d_s)));
0156   if (!spline) {
0157     fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_2d_s.\n");
0158     abort();
0159   }
0160   spline->spcode = MULTI_U2D;
0161   spline->tcode  = SINGLE_REAL;
0162   spline->xBC = xBC; 
0163   spline->yBC = yBC; 
0164   spline->num_splines = num_splines;
0165   // Setup internal variables
0166   int Mx = x_grid.num;
0167   int My = y_grid.num;
0168   int Nx, Ny;
0169 
0170   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     
0171     Nx = Mx+3;
0172   else                           
0173     Nx = Mx+2;
0174   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
0175 
0176   x_grid.delta_inv = 1.0/x_grid.delta;
0177   spline->x_grid   = x_grid;
0178 
0179   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     
0180     Ny = My+3;
0181   else                           
0182     Ny = My+2;
0183   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
0184   y_grid.delta_inv = 1.0/y_grid.delta;
0185   spline->y_grid   = y_grid;
0186 
0187   int N = num_splines;
0188 #ifdef HAVE_SSE
0189   if (N % 4) 
0190     N += 4 - (N % 4);
0191 #endif
0192 
0193 
0194   spline->x_stride = Ny*N;
0195   spline->y_stride = N;
0196 
0197 #ifndef HAVE_POSIX_MEMALIGN
0198   spline->coefs = (float*)malloc (sizeof(float)*Nx*Ny*N);
0199 #else
0200   posix_memalign ((void**)&spline->coefs, 64, 
0201           sizeof(float)*Nx*Ny*N);
0202 #endif
0203 #ifdef HAVE_SSE
0204   init_sse_data();
0205 #endif
0206   if (!spline->coefs) {
0207     fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_2d_s.\n");
0208     abort();
0209   }
0210 
0211   return spline;
0212 }
0213 
0214 void
0215 set_multi_UBspline_2d_s (multi_UBspline_2d_s* spline, int num, float *data)
0216 {
0217   int Mx = spline->x_grid.num;
0218   int My = spline->y_grid.num;
0219   int Nx, Ny;
0220 
0221   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     
0222     Nx = Mx+3;
0223   else                                   
0224     Nx = Mx+2;
0225   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     
0226     Ny = My+3;
0227   else                                   
0228     Ny = My+2;
0229 
0230   float *coefs = spline->coefs + num;
0231   int ys = spline->y_stride;
0232   // First, solve in the X-direction 
0233   for (int iy=0; iy<My; iy++) {
0234     intptr_t doffset = iy;
0235     intptr_t coffset = iy*ys;
0236     find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, (intptr_t)My,
0237              coefs+coffset, (intptr_t)Ny*ys);
0238   }
0239   
0240   // Now, solve in the Y-direction
0241   for (int ix=0; ix<Nx; ix++) {
0242     intptr_t doffset = ix*Ny*ys;
0243     intptr_t coffset = ix*Ny*ys;
0244     find_coefs_1d_s (spline->y_grid, spline->yBC, coefs+doffset, (intptr_t)ys, 
0245              coefs+coffset, (intptr_t)ys);
0246   }
0247 }
0248 
0249 
0250 multi_UBspline_3d_s*
0251 create_multi_UBspline_3d_s (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
0252                 BCtype_s xBC, BCtype_s yBC, BCtype_s zBC,
0253                 int num_splines)
0254 {
0255   // Create new spline
0256   multi_UBspline_3d_s* restrict spline = static_cast<multi_UBspline_3d_s*>(malloc(sizeof(multi_UBspline_3d_s)));
0257   if (!spline) {
0258     fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_3d_s.\n");
0259     abort();
0260   }
0261   spline->spcode = MULTI_U3D;
0262   spline->tcode  = SINGLE_REAL;
0263   spline->xBC = xBC; 
0264   spline->yBC = yBC; 
0265   spline->zBC = zBC; 
0266   spline->num_splines = num_splines;
0267   // Setup internal variables
0268   int Mx = x_grid.num;  int My = y_grid.num; int Mz = z_grid.num;
0269   int Nx, Ny, Nz;
0270 
0271   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     
0272     Nx = Mx+3;
0273   else                           
0274     Nx = Mx+2;
0275   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
0276   x_grid.delta_inv = 1.0/x_grid.delta;
0277   spline->x_grid   = x_grid;
0278 
0279   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     
0280     Ny = My+3;
0281   else                           
0282     Ny = My+2;
0283   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
0284   y_grid.delta_inv = 1.0/y_grid.delta;
0285   spline->y_grid   = y_grid;
0286 
0287   if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC)     
0288     Nz = Mz+3;
0289   else                           
0290     Nz = Mz+2;
0291   z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
0292   z_grid.delta_inv = 1.0/z_grid.delta;
0293   spline->z_grid   = z_grid;
0294 
0295   int N = num_splines;
0296 #ifdef HAVE_SSE
0297   if (N % 4) 
0298     N += 4 - (N % 4);
0299 #endif
0300 
0301   spline->x_stride      = Ny*Nz*N;
0302   spline->y_stride      = Nz*N;
0303   spline->z_stride      = N;
0304 
0305 #ifndef HAVE_POSIX_MEMALIGN
0306   spline->coefs      = new float[Nx*Ny*Nz*N];
0307 #else
0308   posix_memalign ((void**)&spline->coefs, 64, 
0309           (sizeof(float)*Nx*Ny*Nz*N));
0310 #endif
0311 #ifdef HAVE_SSE
0312   init_sse_data();
0313 #endif
0314   if (!spline->coefs) {
0315     fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_3d_s.\n");
0316     abort();
0317   }
0318 
0319   return spline;
0320 }
0321 
0322 void
0323 set_multi_UBspline_3d_s (multi_UBspline_3d_s* spline, int num, float *data)
0324 {
0325   int Mx = spline->x_grid.num;
0326   int My = spline->y_grid.num;
0327   int Mz = spline->z_grid.num;
0328   int Nx, Ny, Nz;
0329 
0330   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     
0331     Nx = Mx+3;
0332   else                                   
0333     Nx = Mx+2;
0334   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     
0335     Ny = My+3;
0336   else                                   
0337     Ny = My+2;
0338   if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC)     
0339     Nz = Mz+3;
0340   else                                   
0341     Nz = Mz+2;
0342 
0343   float *coefs = spline->coefs + num;
0344 
0345   int zs = spline->z_stride;
0346   // First, solve in the X-direction 
0347   for (int iy=0; iy<My; iy++) 
0348     for (int iz=0; iz<Mz; iz++) {
0349       intptr_t doffset = iy*Mz+iz;
0350       intptr_t coffset = (iy*Nz+iz)*zs;
0351       find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, (intptr_t)My*Mz,
0352                coefs+coffset, (intptr_t)Ny*Nz*zs);
0353     }
0354   
0355   // Now, solve in the Y-direction
0356   for (int ix=0; ix<Nx; ix++) 
0357     for (int iz=0; iz<Nz; iz++) {
0358       intptr_t doffset = (ix*Ny*Nz + iz)*zs;
0359       intptr_t coffset = (ix*Ny*Nz + iz)*zs;
0360       find_coefs_1d_s (spline->y_grid, spline->yBC, coefs+doffset, (intptr_t)Nz*zs, 
0361                coefs+coffset, (intptr_t)Nz*zs);
0362     }
0363 
0364   // Now, solve in the Z-direction
0365   for (int ix=0; ix<Nx; ix++) 
0366     for (int iy=0; iy<Ny; iy++) {
0367       intptr_t doffset = ((ix*Ny+iy)*Nz)*zs;
0368       intptr_t coffset = ((ix*Ny+iy)*Nz)*zs;
0369       find_coefs_1d_s (spline->z_grid, spline->zBC, coefs+doffset, 
0370                (intptr_t)zs, coefs+coffset, (intptr_t)zs);
0371     }
0372 }
0373 
0374 
0375 ////////////////////////////////////////////////////////////
0376 ////////////////////////////////////////////////////////////
0377 ////    Single-Precision, Complex Creation Routines     ////
0378 ////////////////////////////////////////////////////////////
0379 ////////////////////////////////////////////////////////////
0380 
0381 // On input, bands should be filled with:
0382 // row 0   :  abcdInitial from boundary conditions
0383 // rows 1:M:  basis functions in first 3 cols, data in last
0384 // row M+1 :  abcdFinal   from boundary conditions
0385 // cstride gives the stride between values in coefs.
0386 // On exit, coefs with contain interpolating B-spline coefs
0387 multi_UBspline_1d_c*
0388 create_multi_UBspline_1d_c (Ugrid x_grid, BCtype_c xBC, int num_splines)
0389 {
0390   // Create new spline
0391   multi_UBspline_1d_c* restrict spline = static_cast<multi_UBspline_1d_c*>(malloc(sizeof(multi_UBspline_1d_c)));
0392   if (!spline) {
0393     fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_1d_c.\n");
0394     abort();
0395   }
0396   spline->spcode = MULTI_U1D;
0397   spline->tcode  = SINGLE_COMPLEX;
0398   spline->xBC = xBC; 
0399   spline->num_splines = num_splines;
0400   // Setup internal variables
0401   int M = x_grid.num;
0402   int N;
0403 
0404   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
0405     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num);
0406     N = M+3;
0407   }
0408   else {
0409     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
0410     N = M+2;
0411   }
0412 
0413   x_grid.delta_inv = 1.0/x_grid.delta;
0414   spline->x_grid   = x_grid;
0415   spline->x_stride = num_splines;
0416 #ifndef HAVE_POSIX_MEMALIGN
0417   spline->coefs = (complex_float*)malloc (2*sizeof(float)*N*num_splines);
0418 #else
0419   posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(float)*N*num_splines);
0420 #endif
0421 #ifdef HAVE_SSE
0422   init_sse_data();    
0423 #endif
0424   if (!spline->coefs) {
0425     fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_1d_c.\n");
0426     abort();
0427   }
0428 
0429   return spline;
0430 }
0431 
0432 void
0433 set_multi_UBspline_1d_c (multi_UBspline_1d_c* spline, int num, complex_float *data)
0434 {
0435   complex_float *coefs = spline->coefs + num;
0436 
0437   BCtype_s xBC_r, xBC_i;
0438   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
0439   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
0440   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
0441   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
0442 
0443   int xs = spline->x_stride;
0444   // Real part
0445   find_coefs_1d_s (spline->x_grid, xBC_r, 
0446            (float*)data, (intptr_t)2, (float*)coefs, (intptr_t)2*xs);
0447   // Imaginarty part
0448   find_coefs_1d_s (spline->x_grid, xBC_i, 
0449            ((float*)data)+1, (intptr_t)2, ((float*)coefs+1), (intptr_t)2*xs);
0450 }
0451 
0452 
0453 
0454 multi_UBspline_2d_c*
0455 create_multi_UBspline_2d_c (Ugrid x_grid, Ugrid y_grid,
0456                 BCtype_c xBC, BCtype_c yBC, int num_splines)
0457 {
0458   // Create new spline
0459   multi_UBspline_2d_c* restrict spline = static_cast<multi_UBspline_2d_c*>(malloc(sizeof(multi_UBspline_2d_c)));
0460   if (!spline) {
0461     fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_2d_c.\n");
0462     abort();
0463   }
0464   spline->spcode = MULTI_U2D;
0465   spline->tcode  = SINGLE_COMPLEX;
0466   spline->xBC = xBC; 
0467   spline->yBC = yBC;
0468   spline->num_splines = num_splines;
0469 
0470   // Setup internal variables
0471   int Mx = x_grid.num;
0472   int My = y_grid.num;
0473   int Nx, Ny;
0474 
0475   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     
0476     Nx = Mx+3;
0477   else                           
0478     Nx = Mx+2;
0479   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
0480   x_grid.delta_inv = 1.0/x_grid.delta;
0481   spline->x_grid   = x_grid;
0482 
0483   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     
0484     Ny = My+3;
0485   else                           
0486     Ny = My+2;
0487   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
0488   y_grid.delta_inv = 1.0/y_grid.delta;
0489   spline->y_grid   = y_grid;
0490 
0491   int N = num_splines;
0492 #ifdef HAVE_SSE
0493   if (N % 2)
0494     N++;
0495 #endif
0496 
0497   spline->x_stride = Ny*N;
0498   spline->y_stride = N;
0499 
0500 #ifndef HAVE_POSIX_MEMALIGN
0501   spline->coefs = (complex_float*)malloc (2*sizeof(float)*Nx*Ny*N);
0502   spline->lapl2 = (complex_float*)malloc (4*sizeof(float)*N);
0503 #else
0504   posix_memalign ((void**)&spline->coefs, 64, 
0505           2*sizeof(float)*Nx*Ny*N);
0506   posix_memalign ((void**)&spline->lapl2, 64,
0507           4*sizeof(float)*N);
0508 #endif
0509 #ifdef HAVE_SSE
0510   init_sse_data();
0511 #endif
0512   if (!spline->coefs || !spline->lapl2) {
0513     fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_2d_c.\n");
0514     abort();
0515   }
0516   return spline;
0517 }
0518 
0519 
0520 void
0521 set_multi_UBspline_2d_c (multi_UBspline_2d_c* spline, int num, complex_float *data)
0522 {
0523   // Setup internal variables
0524   int Mx = spline->x_grid.num;
0525   int My = spline->y_grid.num;
0526   int Nx, Ny;
0527 
0528   complex_float* coefs = spline->coefs + num;
0529 
0530   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     
0531     Nx = Mx+3;
0532   else                                   
0533     Nx = Mx+2;
0534   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     
0535     Ny = My+3;
0536   else                                   
0537     Ny = My+2;
0538 
0539   BCtype_s xBC_r, xBC_i, yBC_r, yBC_i;
0540   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
0541   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
0542   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
0543   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
0544   yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
0545   yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
0546   yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
0547   yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
0548  
0549   int ys = spline->y_stride;
0550   // First, solve in the X-direction 
0551   for (int iy=0; iy<My; iy++) {
0552     intptr_t doffset = (2*iy);
0553     intptr_t coffset = (2*iy)*ys;
0554     // Real part
0555     find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, (intptr_t)2*My,
0556              (float*)coefs+coffset, (intptr_t)2*Ny*ys);
0557     // Imag part
0558     find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, (intptr_t)2*My,
0559              ((float*)coefs)+coffset+1, (intptr_t)2*Ny*ys);
0560   }
0561   
0562   // Now, solve in the Y-direction
0563   for (int ix=0; ix<Nx; ix++) {
0564     intptr_t doffset = (2*ix*Ny)*ys;
0565     intptr_t coffset = (2*ix*Ny)*ys;
0566     // Real part
0567     find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)coefs)+doffset, 
0568              (intptr_t)2*ys, ((float*)coefs)+coffset, (intptr_t)2*ys);
0569     // Imag part
0570     find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)coefs)+doffset+1, 
0571              (intptr_t)2*ys, ((float*)coefs)+coffset+1, (intptr_t)2*ys);
0572   }  
0573 }
0574 
0575 multi_UBspline_3d_c*
0576 create_multi_UBspline_3d_c (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
0577               BCtype_c xBC, BCtype_c yBC, BCtype_c zBC,
0578               int num_splines)
0579 {
0580   // Create new spline
0581   multi_UBspline_3d_c* restrict spline = static_cast<multi_UBspline_3d_c*>(malloc(sizeof(multi_UBspline_3d_c)));
0582   if (!spline) {
0583     fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_3d_c.\n");
0584     abort();
0585   }
0586   spline->spcode = MULTI_U3D;
0587   spline->tcode  = SINGLE_COMPLEX;
0588   spline->xBC = xBC; 
0589   spline->yBC = yBC; 
0590   spline->zBC = zBC; 
0591   spline->num_splines = num_splines;
0592 
0593   // Setup internal variables
0594   int Mx = x_grid.num;  int My = y_grid.num; int Mz = z_grid.num;
0595   int Nx, Ny, Nz;
0596 
0597   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     
0598     Nx = Mx+3;
0599   else                           
0600     Nx = Mx+2;
0601   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
0602   x_grid.delta_inv = 1.0/x_grid.delta;
0603   spline->x_grid   = x_grid;
0604 
0605   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     
0606     Ny = My+3;
0607   else                           
0608     Ny = My+2;
0609   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
0610   y_grid.delta_inv = 1.0/y_grid.delta;
0611   spline->y_grid   = y_grid;
0612 
0613   if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC)     
0614     Nz = Mz+3;
0615   else                           
0616     Nz = Mz+2;
0617   z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
0618   z_grid.delta_inv = 1.0/z_grid.delta;
0619   spline->z_grid   = z_grid;
0620 
0621   int N = spline->num_splines;
0622 #ifdef HAVE_SSE
0623   if (N % 2)
0624     N++;
0625 #endif
0626 
0627   spline->x_stride = Ny*Nz*N;
0628   spline->y_stride = Nz*N;
0629   spline->z_stride = N;
0630 
0631 #ifndef HAVE_POSIX_MEMALIGN
0632   spline->coefs = (complex_float*)malloc (2*sizeof(float)*Nx*Ny*Nz*N);
0633   spline->lapl3 = (complex_float*)malloc (6*sizeof(float)*N);
0634 #else
0635   posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(float)*Nx*Ny*Nz*N);
0636   posix_memalign ((void**)&spline->lapl3, 64, 6*sizeof(float)*N);  
0637 #endif
0638 #ifdef HAVE_SSE
0639   init_sse_data();
0640 #endif
0641   if (!spline->coefs || !spline->lapl3) {
0642     fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_3d_c.\n");
0643     abort();
0644   }
0645 
0646   return spline;
0647 }
0648 
0649 void
0650 set_multi_UBspline_3d_c (multi_UBspline_3d_c* spline, int num, complex_float *data)
0651 {
0652   // Setup internal variables
0653   int Mx = spline->x_grid.num;
0654   int My = spline->y_grid.num;
0655   int Mz = spline->z_grid.num;
0656   int Nx, Ny, Nz;
0657 
0658   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     
0659     Nx = Mx+3;
0660   else                           
0661     Nx = Mx+2;
0662   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     
0663     Ny = My+3;
0664   else                           
0665     Ny = My+2;
0666   if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC)     
0667     Nz = Mz+3;
0668   else                          
0669     Nz = Mz+2;
0670 
0671   BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
0672   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
0673   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
0674   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
0675   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
0676   yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
0677   yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
0678   yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
0679   yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
0680   zBC_r.lCode = spline->zBC.lCode;  zBC_r.rCode = spline->zBC.rCode;
0681   zBC_r.lVal  = spline->zBC.lVal_r; zBC_r.rVal  = spline->zBC.rVal_r;
0682   zBC_i.lCode = spline->zBC.lCode;  zBC_i.rCode = spline->zBC.rCode;
0683   zBC_i.lVal  = spline->zBC.lVal_i; zBC_i.rVal  = spline->zBC.rVal_i;
0684 
0685   complex_float *coefs = spline->coefs + num;
0686   int zs = spline->z_stride;
0687   // First, solve in the X-direction 
0688   for (int iy=0; iy<My; iy++) 
0689     for (int iz=0; iz<Mz; iz++) {
0690       intptr_t doffset = 2*(iy*Mz+iz);
0691       intptr_t coffset = 2*(iy*Nz+iz)*zs;
0692       // Real part
0693       find_coefs_1d_s (spline->x_grid, xBC_r, 
0694                ((float*)data)+doffset,  (intptr_t)2*My*Mz,
0695                ((float*)coefs)+coffset, (intptr_t)2*Ny*Nz*zs);
0696       // Imag part
0697       find_coefs_1d_s (spline->x_grid, xBC_i, 
0698                ((float*)data)+doffset+1,  (intptr_t)2*My*Mz,
0699                ((float*)coefs)+coffset+1, (intptr_t)2*Ny*Nz*zs);
0700     }
0701   
0702   // Now, solve in the Y-direction
0703   for (int ix=0; ix<Nx; ix++) 
0704     for (int iz=0; iz<Nz; iz++) {
0705       intptr_t doffset = 2*(ix*Ny*Nz + iz)*zs;
0706       intptr_t coffset = 2*(ix*Ny*Nz + iz)*zs;
0707       // Real part
0708       find_coefs_1d_s (spline->y_grid, yBC_r, 
0709                ((float*)coefs)+doffset, (intptr_t)2*Nz*zs, 
0710                ((float*)coefs)+coffset, (intptr_t)2*Nz*zs);
0711       // Imag part
0712       find_coefs_1d_s (spline->y_grid, yBC_i, 
0713                ((float*)coefs)+doffset+1, (intptr_t)2*Nz*zs, 
0714                ((float*)coefs)+coffset+1, (intptr_t)2*Nz*zs);
0715     }
0716 
0717   // Now, solve in the Z-direction
0718   for (int ix=0; ix<Nx; ix++) 
0719     for (int iy=0; iy<Ny; iy++) {
0720       intptr_t doffset = 2*((ix*Ny+iy)*Nz)*zs;
0721       intptr_t coffset = 2*((ix*Ny+iy)*Nz)*zs;
0722       // Real part
0723       find_coefs_1d_s (spline->z_grid, zBC_r, 
0724                ((float*)coefs)+doffset, (intptr_t)2*zs, 
0725                ((float*)coefs)+coffset, (intptr_t)2*zs);
0726       // Imag part
0727       find_coefs_1d_s (spline->z_grid, zBC_i, 
0728                ((float*)coefs)+doffset+1, (intptr_t)2*zs, 
0729                ((float*)coefs)+coffset+1, (intptr_t)2*zs);
0730     }
0731 }
0732 
0733 
0734 ////////////////////////////////////////////////////////////
0735 ////////////////////////////////////////////////////////////
0736 ////     Double-Precision, Real Creation Routines       ////
0737 ////////////////////////////////////////////////////////////
0738 ////////////////////////////////////////////////////////////
0739 
0740 // On input, bands should be filled with:
0741 // row 0   :  abcdInitial from boundary conditions
0742 // rows 1:M:  basis functions in first 3 cols, data in last
0743 // row M+1 :  abcdFinal   from boundary conditions
0744 // cstride gives the stride between values in coefs.
0745 // On exit, coefs with contain interpolating B-spline coefs
0746 void 
0747 solve_deriv_interp_1d_d (double bands[], double coefs[],
0748              int M, int cstride);
0749 
0750 // On input, bands should be filled with:
0751 // row 0   :  abcdInitial from boundary conditions
0752 // rows 1:M:  basis functions in first 3 cols, data in last
0753 // row M+1 :  abcdFinal   from boundary conditions
0754 // cstride gives the stride between values in coefs.
0755 // On exit, coefs with contain interpolating B-spline coefs
0756 void 
0757 solve_periodic_interp_1d_d (double bands[], double coefs[],
0758                 int M, int cstride);
0759 
0760 void
0761 find_coefs_1d_d (Ugrid grid, BCtype_d bc, 
0762          double *data,  intptr_t dstride,
0763          double *coefs, intptr_t cstride);
0764 
0765 multi_UBspline_1d_d*
0766 create_multi_UBspline_1d_d (Ugrid x_grid, BCtype_d xBC, int num_splines)
0767 {
0768   // Create new spline
0769   multi_UBspline_1d_d* restrict spline = static_cast<multi_UBspline_1d_d*>(malloc(sizeof(multi_UBspline_1d_d)));
0770   if (!spline) {
0771     fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_1d_d.\n");
0772     abort();
0773   }
0774   spline->spcode = MULTI_U1D;
0775   spline->tcode  = DOUBLE_REAL;
0776   spline->xBC = xBC; 
0777   spline->num_splines = num_splines;
0778 
0779   // Setup internal variables
0780   int Mx = x_grid.num;
0781   int Nx;
0782 
0783   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
0784     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num);
0785     Nx = Mx+3;
0786   }
0787   else {
0788     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
0789     Nx = Mx+2;
0790   }
0791 
0792   x_grid.delta_inv = 1.0/x_grid.delta;
0793   spline->x_grid   = x_grid;
0794 
0795   int N = num_splines;
0796 #ifdef HAVE_SSE2
0797   // We must pad to keep data aligned for SSE operations
0798   if (N & 1)
0799     N++;
0800 #endif
0801   spline->x_stride = N;
0802 
0803 #ifndef HAVE_POSIX_MEMALIGN
0804   spline->coefs = (double*)malloc (sizeof(double)*Nx*N);
0805 
0806 #else
0807   posix_memalign ((void**)&spline->coefs, 64, sizeof(double)*Nx*N);
0808 #endif
0809 #ifdef HAVE_SSE2
0810   init_sse_data();
0811 #endif
0812   if (!spline->coefs) {
0813     fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_1d_d.\n");
0814     abort();
0815   }
0816 
0817   return spline;
0818 }
0819 
0820 void
0821 set_multi_UBspline_1d_d (multi_UBspline_1d_d* spline, int num, double *data)
0822 {
0823   double *coefs = spline->coefs + num;
0824   int xs = spline->x_stride;
0825   find_coefs_1d_d (spline->x_grid, spline->xBC, data, 1, coefs, xs);
0826 }
0827 
0828 void
0829 set_multi_UBspline_1d_d_BC (multi_UBspline_1d_d* spline, int num, double *data,
0830                 BCtype_d xBC)
0831 {
0832   double *coefs = spline->coefs + num;
0833   int xs = spline->x_stride;
0834   find_coefs_1d_d (spline->x_grid, xBC, data, 1, coefs, xs);
0835 }
0836 
0837 
0838 multi_UBspline_2d_d*
0839 create_multi_UBspline_2d_d (Ugrid x_grid, Ugrid y_grid,
0840                 BCtype_d xBC, BCtype_d yBC, int num_splines)
0841 {
0842   // Create new spline
0843   multi_UBspline_2d_d* restrict spline = static_cast<multi_UBspline_2d_d*>(malloc(sizeof(multi_UBspline_2d_d)));
0844   if (!spline) {
0845     fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_2d_d.\n");
0846     abort();
0847   }
0848   spline->spcode = MULTI_U2D;
0849   spline->tcode  = DOUBLE_REAL;
0850   spline->xBC = xBC; 
0851   spline->yBC = yBC; 
0852   spline->num_splines = num_splines;
0853  
0854   // Setup internal variables
0855   int Mx = x_grid.num;
0856   int My = y_grid.num;
0857   int Nx, Ny;
0858 
0859   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     
0860     Nx = Mx+3;
0861   else                           
0862     Nx = Mx+2;
0863   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
0864   x_grid.delta_inv = 1.0/x_grid.delta;
0865   spline->x_grid   = x_grid;
0866 
0867   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     
0868     Ny = My+3;
0869   else                           
0870     Ny = My+2;
0871   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
0872   y_grid.delta_inv = 1.0/y_grid.delta;
0873   spline->y_grid   = y_grid;
0874 
0875   int N = num_splines;
0876 
0877 #ifdef HAVE_SSE2
0878   // We must pad to keep data align for SSE operations
0879   if (num_splines & 1)
0880     N++;
0881 #endif
0882   spline->x_stride = Ny*N;
0883   spline->y_stride = N;
0884 
0885 #ifndef HAVE_POSIX_MEMALIGN
0886   spline->coefs = (double*)malloc (sizeof(double)*Nx*Ny*N);
0887 #else
0888   posix_memalign ((void**)&spline->coefs, 64, (sizeof(double)*Nx*Ny*N));
0889 #endif
0890 #ifdef HAVE_SSE2
0891   init_sse_data();
0892 #endif
0893   if (!spline->coefs) {
0894     fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_2d_d.\n");
0895     abort();
0896   }
0897 
0898   return spline;
0899 }
0900 
0901 void
0902 set_multi_UBspline_2d_d (multi_UBspline_2d_d* spline, int num, double *data)
0903 {
0904   int Mx = spline->x_grid.num;
0905   int My = spline->y_grid.num;
0906   int Nx, Ny;
0907   double *coefs = spline->coefs + num;
0908 
0909   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     
0910     Nx = Mx+3;
0911   else                                   
0912     Nx = Mx+2;
0913 
0914   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     
0915     Ny = My+3;
0916   else                                   
0917     Ny = My+2;
0918 
0919   int ys = spline->y_stride;
0920   // First, solve in the X-direction 
0921   for (int iy=0; iy<My; iy++) {
0922     intptr_t doffset = iy;
0923     intptr_t coffset = iy*ys;
0924     find_coefs_1d_d (spline->x_grid, spline->xBC, 
0925              data+doffset,  (intptr_t)My,
0926              coefs+coffset, (intptr_t)Ny*ys);
0927   }
0928   
0929   // Now, solve in the Y-direction
0930   for (int ix=0; ix<Nx; ix++) {
0931     intptr_t doffset = ix*Ny*ys;
0932     intptr_t coffset = ix*Ny*ys;
0933     find_coefs_1d_d (spline->y_grid, spline->yBC, 
0934              coefs+doffset, (intptr_t)ys, 
0935              coefs+coffset, (intptr_t)ys);
0936   }
0937 }
0938 
0939 
0940 multi_UBspline_3d_d*
0941 create_multi_UBspline_3d_d (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
0942                 BCtype_d xBC, BCtype_d yBC, BCtype_d zBC,
0943                 int num_splines)
0944 {
0945   // Create new spline
0946   multi_UBspline_3d_d* restrict spline;
0947 #ifdef HAVE_POSIX_MEMALIGN
0948   posix_memalign ((void**)&spline, 64, (size_t)sizeof(multi_UBspline_3d_d));
0949 #else
0950   spline = static_cast<multi_UBspline_3d_d*>(malloc(sizeof(multi_UBspline_3d_d)));
0951 #endif
0952   if (!spline) {
0953     fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_3d_d.\n");
0954     abort();
0955   }
0956   spline->spcode = MULTI_U3D;
0957   spline->tcode  = DOUBLE_REAL;
0958   spline->xBC = xBC; 
0959   spline->yBC = yBC; 
0960   spline->zBC = zBC; 
0961   spline->num_splines = num_splines;
0962 
0963   // Setup internal variables
0964   int Mx = x_grid.num;  int My = y_grid.num; int Mz = z_grid.num;
0965   int Nx, Ny, Nz;
0966 
0967   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     
0968     Nx = Mx+3;
0969   else                           
0970     Nx = Mx+2;
0971   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
0972   x_grid.delta_inv = 1.0/x_grid.delta;
0973   spline->x_grid   = x_grid;
0974 
0975   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     
0976     Ny = My+3;
0977   else                           
0978     Ny = My+2;
0979   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
0980   y_grid.delta_inv = 1.0/y_grid.delta;
0981   spline->y_grid   = y_grid;
0982 
0983   if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC)     
0984     Nz = Mz+3;
0985   else                           
0986     Nz = Mz+2;
0987   z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
0988   z_grid.delta_inv = 1.0/z_grid.delta;
0989   spline->z_grid   = z_grid;
0990 
0991 
0992   int N = num_splines;
0993 #if defined HAVE_SSE2 || defined HAVE_VSX
0994   // We must pad to keep data align for SSE operations
0995   if (N & 1)
0996     N++;
0997 #endif
0998   
0999   spline->x_stride = Ny*Nz*N;
1000   spline->y_stride = Nz*N;
1001   spline->z_stride = N;
1002   
1003 #ifdef HAVE_POSIX_MEMALIGN
1004   posix_memalign ((void**)&spline->coefs, 64, ((size_t)sizeof(double)*Nx*Ny*Nz*N));
1005 #else
1006   spline->coefs      = new double[Nx*Ny*Nz*N];
1007 #endif
1008 #ifdef HAVE_SSE2
1009   init_sse_data();
1010 #endif
1011   if (!spline->coefs) {
1012     fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_3d_d.\n");
1013     abort();
1014   }
1015 
1016   return spline;
1017 }
1018 
1019 void
1020 set_multi_UBspline_3d_d (multi_UBspline_3d_d* spline, int num, double *data)
1021 {
1022   int Mx = spline->x_grid.num;  
1023   int My = spline->y_grid.num; 
1024   int Mz = spline->z_grid.num;
1025   int Nx, Ny, Nz;
1026 
1027   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     
1028     Nx = Mx+3;
1029   else                                   
1030     Nx = Mx+2;
1031   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     
1032     Ny = My+3;
1033   else                                   
1034     Ny = My+2;
1035   if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC)     
1036     Nz = Mz+3;
1037   else                                   
1038     Nz = Mz+2;
1039 
1040   double *coefs = spline->coefs + num;
1041   intptr_t zs = spline->z_stride;
1042 
1043   // First, solve in the X-direction 
1044   for (int iy=0; iy<My; iy++) 
1045     for (int iz=0; iz<Mz; iz++) {
1046       intptr_t doffset = iy*Mz+iz;
1047       intptr_t coffset = (iy*Nz+iz)*zs;
1048       find_coefs_1d_d (spline->x_grid, spline->xBC, 
1049                data+doffset,  (intptr_t)My*Mz,
1050                coefs+coffset, (intptr_t)Ny*Nz*zs);
1051     }
1052   
1053   // Now, solve in the Y-direction
1054   for (int ix=0; ix<Nx; ix++) 
1055     for (int iz=0; iz<Nz; iz++) {
1056       intptr_t doffset = (ix*Ny*Nz + iz)*zs;
1057       intptr_t coffset = (ix*Ny*Nz + iz)*zs;
1058       find_coefs_1d_d (spline->y_grid, spline->yBC, 
1059                coefs+doffset, (intptr_t)Nz*zs, 
1060                coefs+coffset, (intptr_t)Nz*zs);
1061     }
1062 
1063   // Now, solve in the Z-direction
1064   for (int ix=0; ix<Nx; ix++) 
1065     for (int iy=0; iy<Ny; iy++) {
1066       intptr_t doffset = (ix*Ny+iy)*Nz*zs;
1067       intptr_t coffset = (ix*Ny+iy)*Nz*zs;
1068       find_coefs_1d_d (spline->z_grid, spline->zBC, 
1069                coefs+doffset, (intptr_t)zs, 
1070                coefs+coffset, (intptr_t)zs);
1071     }
1072 }
1073 
1074 
1075 ////////////////////////////////////////////////////////////
1076 ////////////////////////////////////////////////////////////
1077 ////    Double-Precision, Complex Creation Routines     ////
1078 ////////////////////////////////////////////////////////////
1079 ////////////////////////////////////////////////////////////
1080 
1081 // On input, bands should be filled with:
1082 // row 0   :  abcdInitial from boundary conditions
1083 // rows 1:M:  basis functions in first 3 cols, data in last
1084 // row M+1 :  abcdFinal   from boundary conditions
1085 // cstride gives the stride between values in coefs.
1086 // On exit, coefs with contain interpolating B-spline coefs
1087 
1088 
1089 multi_UBspline_1d_z*
1090 create_multi_UBspline_1d_z (Ugrid x_grid, BCtype_z xBC, int num_splines)
1091 {
1092   // Create new spline
1093   multi_UBspline_1d_z* restrict spline = static_cast<multi_UBspline_1d_z*>(malloc(sizeof(multi_UBspline_1d_z)));
1094   if (!spline) {
1095     fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_1d_z.\n");
1096     abort();
1097   }
1098   spline->spcode = MULTI_U1D;
1099   spline->tcode  = DOUBLE_COMPLEX;
1100   spline->xBC = xBC; 
1101   spline->num_splines = num_splines;
1102 
1103   // Setup internal variables
1104   int Mx = x_grid.num;
1105   int Nx;
1106 
1107   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
1108     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num);
1109     Nx = Mx+3;
1110   }
1111   else {
1112     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
1113     Nx = Mx+2;
1114   }
1115 
1116   x_grid.delta_inv = 1.0/x_grid.delta;
1117   spline->x_grid   = x_grid;
1118   spline->x_stride = num_splines;
1119 
1120 #ifndef HAVE_POSIX_MEMALIGN
1121   spline->coefs = (complex_double*)malloc (2*sizeof(double)*Nx*num_splines);
1122 #else
1123   posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(double)*Nx*num_splines);
1124 #endif
1125 #ifdef HAVE_SSE2
1126   init_sse_data();   
1127 #endif
1128    if (!spline->coefs) {
1129     fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_1d_z.\n");
1130     abort();
1131   }
1132 
1133   return spline;
1134 }
1135 
1136 void
1137 set_multi_UBspline_1d_z (multi_UBspline_1d_z* spline, int num, complex_double *data)
1138 {
1139 //  int Mx = spline->x_grid.num;
1140 // Set but not used
1141 //  int Nx;
1142 
1143   complex_double *coefs = spline->coefs + num;
1144 
1145 //  if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
1146 //    Nx = Mx+3;
1147 //  else
1148 //    Nx = Mx+2;
1149 
1150   BCtype_d xBC_r, xBC_i;
1151   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
1152   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
1153   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
1154   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
1155   int xs = spline->x_stride;
1156   // Real part
1157   find_coefs_1d_d (spline->x_grid, xBC_r, 
1158            (double*)data,      (intptr_t)2, 
1159            ((double*)coefs),   (intptr_t)2*xs);
1160   // Imaginary part
1161   find_coefs_1d_d (spline->x_grid, xBC_i, 
1162            ((double*)data)+1,  (intptr_t)2, 
1163            ((double*)coefs)+1, (intptr_t)2*xs);
1164  
1165 }
1166 
1167 void
1168 set_multi_UBspline_1d_z_BC (multi_UBspline_1d_z *spline, int num, 
1169                 complex_double *data, BCtype_z xBC)
1170 {
1171 //  int Mx = spline->x_grid.num;
1172 //  int Nx;
1173 
1174   complex_double *coefs = spline->coefs + num;
1175 
1176 //  if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)
1177 //    Nx = Mx+3;
1178 //  else
1179 //    Nx = Mx+2;
1180 
1181   BCtype_d xBC_r, xBC_i;
1182   xBC_r.lCode = xBC.lCode;  xBC_r.rCode = xBC.rCode;
1183   xBC_r.lVal  = xBC.lVal_r; xBC_r.rVal  = xBC.rVal_r;
1184   xBC_i.lCode = xBC.lCode;  xBC_i.rCode = xBC.rCode;
1185   xBC_i.lVal  = xBC.lVal_i; xBC_i.rVal  = xBC.rVal_i;
1186   int xs = spline->x_stride;
1187   // Real part
1188   find_coefs_1d_d (spline->x_grid, xBC_r, 
1189            (double*)data,    (intptr_t)2, 
1190            ((double*)coefs), (intptr_t)2*xs);
1191   // Imaginary part
1192   find_coefs_1d_d (spline->x_grid, xBC_i, 
1193            ((double*)data)+1,  (intptr_t)2, 
1194            ((double*)coefs)+1, (intptr_t)2*xs);
1195 }
1196 
1197 
1198 multi_UBspline_2d_z*
1199 create_multi_UBspline_2d_z (Ugrid x_grid, Ugrid y_grid,
1200               BCtype_z xBC, BCtype_z yBC, int num_splines)
1201 {
1202   // Create new spline
1203   multi_UBspline_2d_z* restrict spline = static_cast<multi_UBspline_2d_z*>(malloc(sizeof(multi_UBspline_2d_z)));
1204   if (!spline) {
1205     fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_2d_z.\n");
1206     abort();
1207   }
1208   spline->spcode = MULTI_U2D;
1209   spline->tcode  = DOUBLE_COMPLEX;
1210   spline->xBC = xBC; 
1211   spline->yBC = yBC;
1212   spline->num_splines = num_splines;
1213 
1214   // Setup internal variables
1215   int Mx = x_grid.num;
1216   int My = y_grid.num;
1217   int Nx, Ny;
1218 
1219   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     
1220     Nx = Mx+3;
1221   else                           
1222     Nx = Mx+2;
1223   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
1224   x_grid.delta_inv = 1.0/x_grid.delta;
1225   spline->x_grid   = x_grid;
1226 
1227   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     
1228     Ny = My+3;
1229   else                           
1230     Ny = My+2;
1231   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
1232   y_grid.delta_inv = 1.0/y_grid.delta;
1233   spline->y_grid   = y_grid;
1234   spline->x_stride = Ny*num_splines;
1235   spline->y_stride = num_splines;
1236 
1237 #ifndef HAVE_POSIX_MEMALIGN
1238   spline->coefs = (complex_double*)malloc (2*sizeof(double)*Nx*Ny*num_splines);
1239   spline->lapl2 = (complex_double*)malloc (4*sizeof(double)*num_splines);
1240 #else
1241   posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(double)*Nx*Ny*num_splines);
1242   posix_memalign ((void**)&spline->lapl2, 64, 4*sizeof(double)*num_splines);
1243 #endif
1244 #ifdef HAVE_SSE2
1245   init_sse_data();
1246 #endif
1247   if (!spline->coefs || !spline->lapl2) {
1248     fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_2d_z.\n");
1249     abort();
1250   }
1251 
1252   return spline;
1253 }
1254 
1255 
1256 void
1257 set_multi_UBspline_2d_z (multi_UBspline_2d_z* spline, int num,
1258              complex_double *data)
1259 {
1260   int Mx = spline->x_grid.num;
1261   int My = spline->y_grid.num;
1262   int Nx, Ny;
1263 
1264   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     
1265     Nx = Mx+3;
1266   else                           
1267     Nx = Mx+2;
1268   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     
1269     Ny = My+3;
1270   else                           
1271     Ny = My+2;
1272 
1273   BCtype_d xBC_r, xBC_i, yBC_r, yBC_i;
1274   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
1275   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
1276   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
1277   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
1278   yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
1279   yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
1280   yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
1281   yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
1282 
1283   complex_double *coefs = spline->coefs + num;
1284   int ys = spline->y_stride;
1285 
1286   // First, solve in the X-direction 
1287   for (int iy=0; iy<My; iy++) {
1288     intptr_t doffset = 2*iy;
1289     intptr_t coffset = 2*iy*ys;
1290     // Real part
1291     find_coefs_1d_d (spline->x_grid, xBC_r, 
1292              ((double*)data+doffset), (intptr_t)2*My, 
1293              (double*)coefs+coffset,  (intptr_t)2*Ny*ys);
1294     // Imag part
1295     find_coefs_1d_d (spline->x_grid, xBC_i, 
1296              ((double*)data)+doffset+1,  (intptr_t)2*My, 
1297              ((double*)coefs)+coffset+1, (intptr_t)2*Ny*ys);
1298   }
1299   
1300   // Now, solve in the Y-direction
1301   for (int ix=0; ix<Nx; ix++) {
1302     intptr_t doffset = 2*ix*Ny*ys;
1303     intptr_t coffset = 2*ix*Ny*ys;
1304     // Real part
1305     find_coefs_1d_d (spline->y_grid, yBC_r, 
1306              ((double*)coefs)+doffset, (intptr_t)2*ys, 
1307              (double*)coefs+coffset,   (intptr_t)2*ys);
1308     // Imag part
1309     find_coefs_1d_d (spline->y_grid, yBC_i, 
1310              (double*)coefs+doffset+1,   (intptr_t)2*ys, 
1311              ((double*)coefs)+coffset+1, (intptr_t)2*ys);
1312   }
1313 }
1314 
1315 
1316 
1317 multi_UBspline_3d_z*
1318 create_multi_UBspline_3d_z (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
1319                 BCtype_z xBC, BCtype_z yBC, BCtype_z zBC,
1320                 int num_splines)
1321 {
1322   // Create new spline
1323   multi_UBspline_3d_z* restrict spline = static_cast<multi_UBspline_3d_z*>(malloc(sizeof(multi_UBspline_3d_z)));
1324   if (!spline) {
1325     fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_3d_z.\n");
1326     abort();
1327   }
1328   spline->spcode = MULTI_U3D;
1329   spline->tcode  = DOUBLE_COMPLEX;
1330   spline->xBC = xBC; 
1331   spline->yBC = yBC; 
1332   spline->zBC = zBC;
1333   spline->num_splines = num_splines;
1334 
1335   // Setup internal variables
1336   int Mx = x_grid.num;  int My = y_grid.num; int Mz = z_grid.num;
1337   int Nx, Ny, Nz;
1338 
1339   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     
1340     Nx = Mx+3;
1341   else                           
1342     Nx = Mx+2;
1343   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
1344   x_grid.delta_inv = 1.0/x_grid.delta;
1345   spline->x_grid   = x_grid;
1346 
1347   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     
1348     Ny = My+3;
1349   else                           
1350     Ny = My+2;
1351   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
1352   y_grid.delta_inv = 1.0/y_grid.delta;
1353   spline->y_grid   = y_grid;
1354 
1355   if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC)     
1356     Nz = Mz+3;
1357   else                           
1358     Nz = Mz+2;
1359   z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
1360   z_grid.delta_inv = 1.0/z_grid.delta;
1361   spline->z_grid   = z_grid;
1362 
1363   int N = num_splines;
1364 #ifdef HAVE_SSE2
1365   if (N & 3)
1366     N += 4-(N & 3);
1367 #endif
1368 
1369   spline->x_stride = (intptr_t)Ny*(intptr_t)Nz*N;
1370   spline->y_stride = Nz*N;
1371   spline->z_stride = N;
1372 
1373 #ifndef HAVE_POSIX_MEMALIGN
1374   spline->coefs      = new complex_double[Nx*Ny*Nz*N];
1375   spline->lapl3 = new complex_double[3*N];
1376 #else
1377   posix_memalign ((void**)&spline->coefs, 64, (size_t)2*sizeof(double)*Nx*Ny*Nz*N);
1378   posix_memalign ((void**)&spline->lapl3, 64, 6*sizeof(double)*N);
1379 #endif
1380 
1381 #ifdef HAVE_SSE2
1382   init_sse_data();
1383 #endif
1384   if (!spline->coefs || !spline->lapl3) {
1385     fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_3d_z.\n");
1386     abort();
1387   }
1388 
1389   return spline;
1390 }
1391 
1392 void
1393 set_multi_UBspline_3d_z (multi_UBspline_3d_z* spline, int num, complex_double *data)
1394 {
1395   // Setup internal variables
1396   int Mx = spline->x_grid.num;  
1397   int My = spline->y_grid.num; 
1398   int Mz = spline->z_grid.num;
1399   int Nx, Ny, Nz;
1400 
1401   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     
1402     Nx = Mx+3;
1403   else                                   
1404     Nx = Mx+2;
1405   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     
1406     Ny = My+3;
1407   else                                   
1408     Ny = My+2;
1409   if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC)     
1410     Nz = Mz+3;
1411   else                                   
1412     Nz = Mz+2;
1413 
1414   BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
1415   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
1416   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
1417   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
1418   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
1419   yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
1420   yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
1421   yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
1422   yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
1423   zBC_r.lCode = spline->zBC.lCode;  zBC_r.rCode = spline->zBC.rCode;
1424   zBC_r.lVal  = spline->zBC.lVal_r; zBC_r.rVal  = spline->zBC.rVal_r;
1425   zBC_i.lCode = spline->zBC.lCode;  zBC_i.rCode = spline->zBC.rCode;
1426   zBC_i.lVal  = spline->zBC.lVal_i; zBC_i.rVal  = spline->zBC.rVal_i;
1427 
1428   complex_double *coefs = spline->coefs + num;
1429 
1430 // unused variable
1431 //  int N = spline->num_splines;
1432   int zs = spline->z_stride;
1433   // First, solve in the X-direction 
1434   for (int iy=0; iy<My; iy++) 
1435     for (int iz=0; iz<Mz; iz++) {
1436       intptr_t doffset = 2*(iy*Mz+iz);
1437       intptr_t coffset = 2*(iy*Nz+iz)*zs;
1438       // Real part
1439       find_coefs_1d_d (spline->x_grid, xBC_r, 
1440                ((double*)data)+doffset,  (intptr_t)2*My*Mz,
1441                ((double*)coefs)+coffset, (intptr_t)2*Ny*Nz*zs);
1442       // Imag part
1443       find_coefs_1d_d (spline->x_grid, xBC_i, 
1444                ((double*)data)+doffset+1,  (intptr_t)2*My*Mz,
1445                ((double*)coefs)+coffset+1, (intptr_t)2*Ny*Nz*zs);
1446     }
1447   
1448   // Now, solve in the Y-direction
1449   for (int ix=0; ix<Nx; ix++) 
1450     for (int iz=0; iz<Nz; iz++) {
1451       intptr_t doffset = 2*(ix*Ny*Nz + iz)*zs;
1452       intptr_t coffset = 2*(ix*Ny*Nz + iz)*zs;
1453       // Real part
1454       find_coefs_1d_d (spline->y_grid, yBC_r, 
1455                ((double*)coefs)+doffset, (intptr_t)2*Nz*zs, 
1456                ((double*)coefs)+coffset, (intptr_t)2*Nz*zs);
1457       // Imag part
1458       find_coefs_1d_d (spline->y_grid, yBC_i, 
1459                ((double*)coefs)+doffset+1, (intptr_t)2*Nz*zs, 
1460                ((double*)coefs)+coffset+1, (intptr_t)2*Nz*zs);
1461     }
1462 
1463   // Now, solve in the Z-direction
1464   for (int ix=0; ix<Nx; ix++) 
1465     for (int iy=0; iy<Ny; iy++) {
1466       intptr_t doffset = 2*((ix*Ny+iy)*Nz)*zs;
1467       intptr_t coffset = 2*((ix*Ny+iy)*Nz)*zs;
1468       // Real part
1469       find_coefs_1d_d (spline->z_grid, zBC_r, 
1470                ((double*)coefs)+doffset, (intptr_t)2*zs, 
1471                ((double*)coefs)+coffset, (intptr_t)2*zs);
1472       // Imag part
1473       find_coefs_1d_d (spline->z_grid, zBC_i, 
1474                ((double*)coefs)+doffset+1, (intptr_t)2*zs, 
1475                ((double*)coefs)+coffset+1, (intptr_t)2*zs);
1476     }
1477 }
1478 
1479 
1480 void
1481 destroy_multi_UBspline (Bspline *spline)
1482 {
1483   free(spline->coefs);
1484   free(spline);
1485 }