File indexing completed on 2024-12-22 04:09:37
0001 ///////////////////////////////////////////////////////////////////////////// 0002 // einspline: a library for creating and evaluating B-splines // 0003 // Copyright (C) 2007 Kenneth P. Esler, Jr. // 0004 // // 0005 // This program is free software; you can redistribute it and/or modify // 0006 // it under the terms of the GNU General Public License as published by // 0007 // the Free Software Foundation; either version 2 of the License, or // 0008 // (at your option) any later version. // 0009 // // 0010 // This program is distributed in the hope that it will be useful, // 0011 // but WITHOUT ANY WARRANTY; without even the implied warranty of // 0012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // 0013 // GNU General Public License for more details. // 0014 // // 0015 // You should have received a copy of the GNU General Public License // 0016 // along with this program; if not, write to the Free Software // 0017 // Foundation, Inc., 51 Franklin Street, Fifth Floor, // 0018 // Boston, MA 02110-1301 USA // 0019 ///////////////////////////////////////////////////////////////////////////// 0020 0021 #include "bspline_create.h" 0022 #ifndef _XOPEN_SOURCE 0023 #define _XOPEN_SOURCE 600 0024 #endif 0025 #ifndef __USE_XOPEN2K 0026 #define __USE_XOPEN2K 0027 #endif 0028 #include <stdlib.h> 0029 #include <stdio.h> 0030 #include <vector> 0031 0032 int posix_memalign(void **memptr, size_t alignment, size_t size); 0033 0034 //////////////////////////////////////////////////////////// 0035 //////////////////////////////////////////////////////////// 0036 //// Helper functions for spline creation //// 0037 //////////////////////////////////////////////////////////// 0038 //////////////////////////////////////////////////////////// 0039 void init_sse_data(); 0040 0041 void 0042 find_coefs_1d_d (Ugrid grid, BCtype_d bc, 0043 double *data, intptr_t dstride, 0044 double *coefs, intptr_t cstride); 0045 0046 void 0047 solve_deriv_interp_1d_s (float bands[], float coefs[], 0048 int M, int cstride) 0049 { 0050 // Solve interpolating equations 0051 // First and last rows are different 0052 bands[4*(0)+1] /= bands[4*(0)+0]; 0053 bands[4*(0)+2] /= bands[4*(0)+0]; 0054 bands[4*(0)+3] /= bands[4*(0)+0]; 0055 bands[4*(0)+0] = 1.0; 0056 bands[4*(1)+1] -= bands[4*(1)+0]*bands[4*(0)+1]; 0057 bands[4*(1)+2] -= bands[4*(1)+0]*bands[4*(0)+2]; 0058 bands[4*(1)+3] -= bands[4*(1)+0]*bands[4*(0)+3]; 0059 bands[4*(0)+0] = 0.0; 0060 bands[4*(1)+2] /= bands[4*(1)+1]; 0061 bands[4*(1)+3] /= bands[4*(1)+1]; 0062 bands[4*(1)+1] = 1.0; 0063 0064 // Now do rows 2 through M+1 0065 for (int row=2; row < (M+1); row++) { 0066 bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2]; 0067 bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3]; 0068 bands[4*(row)+2] /= bands[4*(row)+1]; 0069 bands[4*(row)+3] /= bands[4*(row)+1]; 0070 bands[4*(row)+0] = 0.0; 0071 bands[4*(row)+1] = 1.0; 0072 } 0073 0074 // Do last row 0075 bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2]; 0076 bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3]; 0077 bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2]; 0078 bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3]; 0079 bands[4*(M+1)+3] /= bands[4*(M+1)+2]; 0080 bands[4*(M+1)+2] = 1.0; 0081 0082 coefs[(M+1)*cstride] = bands[4*(M+1)+3]; 0083 // Now back substitute up 0084 for (int row=M; row>0; row--) 0085 coefs[row*cstride] = bands[4*(row)+3] - bands[4*(row)+2]*coefs[cstride*(row+1)]; 0086 0087 // Finish with first row 0088 coefs[0] = bands[4*(0)+3] - bands[4*(0)+1]*coefs[1*cstride] - bands[4*(0)+2]*coefs[2*cstride]; 0089 } 0090 0091 // On input, bands should be filled with: 0092 // row 0 : abcdInitial from boundary conditions 0093 // rows 1:M: basis functions in first 3 cols, data in last 0094 // row M+1 : abcdFinal from boundary conditions 0095 // cstride gives the stride between values in coefs. 0096 // On exit, coefs with contain interpolating B-spline coefs 0097 void 0098 solve_periodic_interp_1d_s (float bands[], float coefs[], 0099 int M, int cstride) 0100 { 0101 std::vector<float> lastCol(M); 0102 0103 // Now solve: 0104 // First and last rows are different 0105 bands[4*(0)+2] /= bands[4*(0)+1]; 0106 bands[4*(0)+0] /= bands[4*(0)+1]; 0107 bands[4*(0)+3] /= bands[4*(0)+1]; 0108 bands[4*(0)+1] = 1.0; 0109 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0]; 0110 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3]; 0111 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(0)+2]; 0112 lastCol[0] = bands[4*(0)+0]; 0113 0114 for (int row=1; row < (M-1); row++) { 0115 bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2]; 0116 bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3]; 0117 lastCol[row] = -bands[4*(row)+0] * lastCol[row-1]; 0118 bands[4*(row)+0] = 0.0; 0119 bands[4*(row)+2] /= bands[4*(row)+1]; 0120 bands[4*(row)+3] /= bands[4*(row)+1]; 0121 lastCol[row] /= bands[4*(row)+1]; 0122 bands[4*(row)+1] = 1.0; 0123 if (row < (M-2)) { 0124 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3]; 0125 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row]; 0126 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2]; 0127 } 0128 } 0129 0130 // Now do last row 0131 // The [2] element and [0] element are now on top of each other 0132 bands[4*(M-1)+0] += bands[4*(M-1)+2]; 0133 bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]); 0134 bands[4*(M-1)+3] -= bands[4*(M-1)+0] * bands[4*(M-2)+3]; 0135 bands[4*(M-1)+3] /= bands[4*(M-1)+1]; 0136 coefs[M*cstride] = bands[4*(M-1)+3]; 0137 for (int row=M-2; row>=0; row--) 0138 coefs[(row+1)*cstride] = 0139 bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride]; 0140 0141 coefs[0*cstride] = coefs[M*cstride]; 0142 coefs[(M+1)*cstride] = coefs[1*cstride]; 0143 coefs[(M+2)*cstride] = coefs[2*cstride]; 0144 0145 0146 } 0147 0148 0149 // On input, bands should be filled with: 0150 // row 0 : abcdInitial from boundary conditions 0151 // rows 1:M: basis functions in first 3 cols, data in last 0152 // row M+1 : abcdFinal from boundary conditions 0153 // cstride gives the stride between values in coefs. 0154 // On exit, coefs with contain interpolating B-spline coefs 0155 void 0156 solve_antiperiodic_interp_1d_s (float bands[], float coefs[], 0157 int M, int cstride) 0158 { 0159 bands[4*0+0] *= -1.0; 0160 bands[4*(M-1)+2] *= -1.0; 0161 0162 std::vector<float> lastCol(M); 0163 0164 // Now solve: 0165 // First and last rows are different 0166 bands[4*(0)+2] /= bands[4*(0)+1]; 0167 bands[4*(0)+0] /= bands[4*(0)+1]; 0168 bands[4*(0)+3] /= bands[4*(0)+1]; 0169 bands[4*(0)+1] = 1.0; 0170 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0]; 0171 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3]; 0172 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(0)+2]; 0173 lastCol[0] = bands[4*(0)+0]; 0174 0175 for (int row=1; row < (M-1); row++) { 0176 bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2]; 0177 bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3]; 0178 lastCol[row] = -bands[4*(row)+0] * lastCol[row-1]; 0179 bands[4*(row)+0] = 0.0; 0180 bands[4*(row)+2] /= bands[4*(row)+1]; 0181 bands[4*(row)+3] /= bands[4*(row)+1]; 0182 lastCol[row] /= bands[4*(row)+1]; 0183 bands[4*(row)+1] = 1.0; 0184 if (row < (M-2)) { 0185 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3]; 0186 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row]; 0187 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2]; 0188 } 0189 } 0190 0191 // Now do last row 0192 // The [2] element and [0] element are now on top of each other 0193 bands[4*(M-1)+0] += bands[4*(M-1)+2]; 0194 bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]); 0195 bands[4*(M-1)+3] -= bands[4*(M-1)+0] * bands[4*(M-2)+3]; 0196 bands[4*(M-1)+3] /= bands[4*(M-1)+1]; 0197 coefs[M*cstride] = bands[4*(M-1)+3]; 0198 for (int row=M-2; row>=0; row--) 0199 coefs[(row+1)*cstride] = 0200 bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride]; 0201 0202 coefs[0*cstride] = -coefs[M*cstride]; 0203 coefs[(M+1)*cstride] = -coefs[1*cstride]; 0204 coefs[(M+2)*cstride] = -coefs[2*cstride]; 0205 0206 0207 } 0208 0209 0210 0211 0212 #ifdef HIGH_PRECISION 0213 void 0214 find_coefs_1d_s (Ugrid grid, BCtype_s bc, 0215 float *data, intptr_t dstride, 0216 float *coefs, intptr_t cstride) 0217 { 0218 BCtype_d d_bc; 0219 double *d_data, *d_coefs; 0220 0221 d_bc.lCode = bc.lCode; d_bc.rCode = bc.rCode; 0222 d_bc.lVal = bc.lVal; d_bc.rVal = bc.rVal; 0223 int M = grid.num, N; 0224 if (bc.lCode == PERIODIC || bc.lCode == ANTIPERIODIC) N = M+3; 0225 else N = M+2; 0226 0227 d_data = new double[N]; 0228 d_coefs = new double[N]; 0229 for (int i=0; i<M; i++) 0230 d_data[i] = data[i*dstride]; 0231 find_coefs_1d_d (grid, d_bc, d_data, 1, d_coefs, 1); 0232 for (int i=0; i<N; i++) 0233 coefs[i*cstride] = d_coefs[i]; 0234 delete[] d_data; 0235 delete[] d_coefs; 0236 } 0237 0238 #else 0239 void 0240 find_coefs_1d_s (Ugrid grid, BCtype_s bc, 0241 float *data, intptr_t dstride, 0242 float *coefs, intptr_t cstride) 0243 { 0244 int M = grid.num; 0245 float basis[4] = {1.0/6.0, 2.0/3.0, 1.0/6.0, 0.0}; 0246 if (bc.lCode == PERIODIC || bc.lCode == ANTIPERIODIC) { 0247 0248 float *bands = new float[4*M]; 0249 0250 for (int i=0; i<M; i++) { 0251 bands[4*i+0] = basis[0]; 0252 bands[4*i+1] = basis[1]; 0253 bands[4*i+2] = basis[2]; 0254 bands[4*i+3] = data[i*dstride]; 0255 } 0256 if (bc.lCode == PERIODIC) 0257 solve_periodic_interp_1d_s (bands, coefs, M, cstride); 0258 else 0259 solve_antiperiodic_interp_1d_s (bands, coefs, M, cstride); 0260 0261 delete[] bands; 0262 } 0263 else { 0264 // Setup boundary conditions 0265 float abcd_left[4]; 0266 float abcd_right[4]; 0267 for (int i = 0; i < 4; i++) { 0268 abcd_left[i] = 0.0; 0269 abcd_right[i] = 0.0; 0270 } 0271 0272 // Left boundary 0273 if (bc.lCode == FLAT || bc.lCode == NATURAL) 0274 bc.lVal = 0.0; 0275 if (bc.lCode == FLAT || bc.lCode == DERIV1) { 0276 abcd_left[0] = -0.5 * grid.delta_inv; 0277 abcd_left[1] = 0.0 * grid.delta_inv; 0278 abcd_left[2] = 0.5 * grid.delta_inv; 0279 abcd_left[3] = bc.lVal; 0280 } 0281 if (bc.lCode == NATURAL || bc.lCode == DERIV2) { 0282 abcd_left[0] = 1.0 * grid.delta_inv * grid.delta_inv; 0283 abcd_left[1] =-2.0 * grid.delta_inv * grid.delta_inv; 0284 abcd_left[2] = 1.0 * grid.delta_inv * grid.delta_inv; 0285 abcd_left[3] = bc.lVal; 0286 } 0287 0288 // Right boundary 0289 if (bc.rCode == FLAT || bc.rCode == NATURAL) 0290 bc.rVal = 0.0; 0291 if (bc.rCode == FLAT || bc.rCode == DERIV1) { 0292 abcd_right[0] = -0.5 * grid.delta_inv; 0293 abcd_right[1] = 0.0 * grid.delta_inv; 0294 abcd_right[2] = 0.5 * grid.delta_inv; 0295 abcd_right[3] = bc.rVal; 0296 } 0297 if (bc.rCode == NATURAL || bc.rCode == DERIV2) { 0298 abcd_right[0] = 1.0 *grid.delta_inv * grid.delta_inv; 0299 abcd_right[1] =-2.0 *grid.delta_inv * grid.delta_inv; 0300 abcd_right[2] = 1.0 *grid.delta_inv * grid.delta_inv; 0301 abcd_right[3] = bc.rVal; 0302 } 0303 0304 float *bands = new float[4*(M+2)]; 0305 for (int i=0; i<4; i++) { 0306 bands[4*( 0 )+i] = abcd_left[i]; 0307 bands[4*(M+1)+i] = abcd_right[i]; 0308 } 0309 for (int i=0; i<M; i++) { 0310 for (int j=0; j<3; j++) 0311 bands[4*(i+1)+j] = basis[j]; 0312 bands[4*(i+1)+3] = data[i*dstride]; 0313 } 0314 // Now, solve for coefficients 0315 solve_deriv_interp_1d_s (bands, coefs, M, cstride); 0316 delete[] bands; 0317 } 0318 } 0319 0320 #endif 0321 0322 //////////////////////////////////////////////////////////// 0323 //////////////////////////////////////////////////////////// 0324 //// Single-Precision, Real Creation Routines //// 0325 //////////////////////////////////////////////////////////// 0326 //////////////////////////////////////////////////////////// 0327 0328 // On input, bands should be filled with: 0329 // row 0 : abcdInitial from boundary conditions 0330 // rows 1:M: basis functions in first 3 cols, data in last 0331 // row M+1 : abcdFinal from boundary conditions 0332 // cstride gives the stride between values in coefs. 0333 // On exit, coefs with contain interpolating B-spline coefs 0334 UBspline_1d_s* 0335 create_UBspline_1d_s (Ugrid x_grid, BCtype_s xBC, float *data) 0336 { 0337 // Create new spline 0338 UBspline_1d_s* restrict spline = static_cast<UBspline_1d_s*>(malloc(sizeof(UBspline_1d_s))); 0339 spline->spcode = U1D; 0340 spline->tcode = SINGLE_REAL; 0341 spline->xBC = xBC; spline->x_grid = x_grid; 0342 0343 // Setup internal variables 0344 int M = x_grid.num; 0345 int N; 0346 0347 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { 0348 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); 0349 N = M+3; 0350 } 0351 else { 0352 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); 0353 N = M+2; 0354 } 0355 0356 x_grid.delta_inv = 1.0/x_grid.delta; 0357 spline->x_grid = x_grid; 0358 #ifndef HAVE_SSE2 0359 spline->coefs = (float*)malloc (sizeof(float)*N); 0360 #else 0361 posix_memalign ((void**)&spline->coefs, 16, (sizeof(float)*N)); 0362 #endif 0363 find_coefs_1d_s (spline->x_grid, xBC, data, 1, spline->coefs, 1); 0364 0365 init_sse_data(); 0366 return spline; 0367 } 0368 0369 void 0370 recompute_UBspline_1d_s (UBspline_1d_s* spline, float *data) 0371 { 0372 find_coefs_1d_s (spline->x_grid, spline->xBC, data, 1, spline->coefs, 1); 0373 } 0374 0375 0376 UBspline_2d_s* 0377 create_UBspline_2d_s (Ugrid x_grid, Ugrid y_grid, 0378 BCtype_s xBC, BCtype_s yBC, float *data) 0379 { 0380 // Create new spline 0381 UBspline_2d_s* restrict spline = static_cast<UBspline_2d_s*>(malloc(sizeof(UBspline_2d_s))); 0382 spline->spcode = U2D; 0383 spline->tcode = SINGLE_REAL; 0384 spline->xBC = xBC; 0385 spline->yBC = yBC; 0386 // Setup internal variables 0387 int Mx = x_grid.num; 0388 int My = y_grid.num; 0389 int Nx, Ny; 0390 0391 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; 0392 else Nx = Mx+2; 0393 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 0394 x_grid.delta_inv = 1.0/x_grid.delta; 0395 spline->x_grid = x_grid; 0396 0397 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; 0398 else Ny = My+2; 0399 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 0400 y_grid.delta_inv = 1.0/y_grid.delta; 0401 spline->y_grid = y_grid; 0402 spline->x_stride = Ny; 0403 #ifndef HAVE_SSE2 0404 spline->coefs = (float*)malloc (sizeof(float)*Nx*Ny); 0405 #else 0406 posix_memalign ((void**)&spline->coefs, 16, sizeof(float)*Nx*Ny); 0407 #endif 0408 0409 // First, solve in the X-direction 0410 for (int iy=0; iy<My; iy++) { 0411 intptr_t doffset = iy; 0412 intptr_t coffset = iy; 0413 find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, My, 0414 spline->coefs+coffset, Ny); 0415 } 0416 0417 // Now, solve in the Y-direction 0418 for (int ix=0; ix<Nx; ix++) { 0419 intptr_t doffset = ix*Ny; 0420 intptr_t coffset = ix*Ny; 0421 find_coefs_1d_s (spline->y_grid, spline->yBC, spline->coefs+doffset, 1, 0422 spline->coefs+coffset, 1); 0423 } 0424 init_sse_data(); 0425 return spline; 0426 } 0427 0428 void 0429 recompute_UBspline_2d_s (UBspline_2d_s* spline, float *data) 0430 { 0431 int Mx = spline->x_grid.num; 0432 int My = spline->y_grid.num; 0433 int Nx, Ny; 0434 0435 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; 0436 else Nx = Mx+2; 0437 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; 0438 else Ny = My+2; 0439 0440 // First, solve in the X-direction 0441 for (int iy=0; iy<My; iy++) { 0442 intptr_t doffset = iy; 0443 intptr_t coffset = iy; 0444 find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, My, 0445 spline->coefs+coffset, Ny); 0446 } 0447 0448 // Now, solve in the Y-direction 0449 for (int ix=0; ix<Nx; ix++) { 0450 intptr_t doffset = ix*Ny; 0451 intptr_t coffset = ix*Ny; 0452 find_coefs_1d_s (spline->y_grid, spline->yBC, spline->coefs+doffset, 1, 0453 spline->coefs+coffset, 1); 0454 } 0455 } 0456 0457 0458 UBspline_3d_s* 0459 create_UBspline_3d_s (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, 0460 BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, 0461 float *data) 0462 { 0463 // Create new spline 0464 UBspline_3d_s* restrict spline = static_cast<UBspline_3d_s*>(malloc(sizeof(UBspline_3d_s))); 0465 spline->spcode = U3D; 0466 spline->tcode = SINGLE_REAL; 0467 spline->xBC = xBC; 0468 spline->yBC = yBC; 0469 spline->zBC = zBC; 0470 // Setup internal variables 0471 int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; 0472 int Nx, Ny, Nz; 0473 0474 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; 0475 else Nx = Mx+2; 0476 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 0477 x_grid.delta_inv = 1.0/x_grid.delta; 0478 spline->x_grid = x_grid; 0479 0480 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; 0481 else Ny = My+2; 0482 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 0483 y_grid.delta_inv = 1.0/y_grid.delta; 0484 spline->y_grid = y_grid; 0485 0486 if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3; 0487 else Nz = Mz+2; 0488 z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); 0489 z_grid.delta_inv = 1.0/z_grid.delta; 0490 spline->z_grid = z_grid; 0491 0492 spline->x_stride = Ny*Nz; 0493 spline->y_stride = Nz; 0494 0495 #ifndef HAVE_SSE2 0496 spline->coefs = new float[Nx*Ny*Nz]; 0497 #else 0498 posix_memalign ((void**)&spline->coefs, 16, (sizeof(float)*Nx*Ny*Nz)); 0499 #endif 0500 0501 // First, solve in the X-direction 0502 for (int iy=0; iy<My; iy++) 0503 for (int iz=0; iz<Mz; iz++) { 0504 intptr_t doffset = iy*Mz+iz; 0505 intptr_t coffset = iy*Nz+iz; 0506 find_coefs_1d_s (spline->x_grid, xBC, data+doffset, My*Mz, 0507 spline->coefs+coffset, Ny*Nz); 0508 } 0509 0510 // Now, solve in the Y-direction 0511 for (int ix=0; ix<Nx; ix++) 0512 for (int iz=0; iz<Nz; iz++) { 0513 intptr_t doffset = ix*Ny*Nz + iz; 0514 intptr_t coffset = ix*Ny*Nz + iz; 0515 find_coefs_1d_s (spline->y_grid, yBC, spline->coefs+doffset, Nz, 0516 spline->coefs+coffset, Nz); 0517 } 0518 0519 // Now, solve in the Z-direction 0520 for (int ix=0; ix<Nx; ix++) 0521 for (int iy=0; iy<Ny; iy++) { 0522 intptr_t doffset = (ix*Ny+iy)*Nz; 0523 intptr_t coffset = (ix*Ny+iy)*Nz; 0524 find_coefs_1d_s (spline->z_grid, zBC, spline->coefs+doffset, 1, 0525 spline->coefs+coffset, 1); 0526 } 0527 init_sse_data(); 0528 return spline; 0529 } 0530 0531 void 0532 recompute_UBspline_3d_s (UBspline_3d_s* spline, float *data) 0533 { 0534 int Mx = spline->x_grid.num; 0535 int My = spline->y_grid.num; 0536 int Mz = spline->z_grid.num; 0537 int Nx, Ny, Nz; 0538 0539 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; 0540 else Nx = Mx+2; 0541 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; 0542 else Ny = My+2; 0543 if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) Nz = Mz+3; 0544 else Nz = Mz+2; 0545 0546 // First, solve in the X-direction 0547 for (int iy=0; iy<My; iy++) 0548 for (int iz=0; iz<Mz; iz++) { 0549 intptr_t doffset = iy*Mz+iz; 0550 intptr_t coffset = iy*Nz+iz; 0551 find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, My*Mz, 0552 spline->coefs+coffset, Ny*Nz); 0553 } 0554 0555 // Now, solve in the Y-direction 0556 for (int ix=0; ix<Nx; ix++) 0557 for (int iz=0; iz<Nz; iz++) { 0558 intptr_t doffset = ix*Ny*Nz + iz; 0559 intptr_t coffset = ix*Ny*Nz + iz; 0560 find_coefs_1d_s (spline->y_grid, spline->yBC, spline->coefs+doffset, Nz, 0561 spline->coefs+coffset, Nz); 0562 } 0563 0564 // Now, solve in the Z-direction 0565 for (int ix=0; ix<Nx; ix++) 0566 for (int iy=0; iy<Ny; iy++) { 0567 intptr_t doffset = (ix*Ny+iy)*Nz; 0568 intptr_t coffset = (ix*Ny+iy)*Nz; 0569 find_coefs_1d_s (spline->z_grid, spline->zBC, spline->coefs+doffset, 1, 0570 spline->coefs+coffset, 1); 0571 } 0572 } 0573 0574 0575 //////////////////////////////////////////////////////////// 0576 //////////////////////////////////////////////////////////// 0577 //// Single-Precision, Complex Creation Routines //// 0578 //////////////////////////////////////////////////////////// 0579 //////////////////////////////////////////////////////////// 0580 0581 // On input, bands should be filled with: 0582 // row 0 : abcdInitial from boundary conditions 0583 // rows 1:M: basis functions in first 3 cols, data in last 0584 // row M+1 : abcdFinal from boundary conditions 0585 // cstride gives the stride between values in coefs. 0586 // On exit, coefs with contain interpolating B-spline coefs 0587 UBspline_1d_c* 0588 create_UBspline_1d_c (Ugrid x_grid, BCtype_c xBC, complex_float *data) 0589 { 0590 // Create new spline 0591 UBspline_1d_c* restrict spline = static_cast<UBspline_1d_c*>(malloc(sizeof(UBspline_1d_c))); 0592 spline->spcode = U1D; 0593 spline->tcode = SINGLE_COMPLEX; 0594 spline->xBC = xBC; 0595 // Setup internal variables 0596 int M = x_grid.num; 0597 int N; 0598 0599 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { 0600 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); 0601 N = M+3; 0602 } 0603 else { 0604 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); 0605 N = M+2; 0606 } 0607 0608 x_grid.delta_inv = 1.0/x_grid.delta; 0609 spline->x_grid = x_grid; 0610 #ifndef HAVE_SSE2 0611 spline->coefs = (complex_float*)malloc (2*sizeof(float)*N); 0612 #else 0613 posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*N); 0614 #endif 0615 0616 BCtype_s xBC_r, xBC_i; 0617 xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; 0618 xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; 0619 xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; 0620 xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; 0621 // Real part 0622 find_coefs_1d_s (spline->x_grid, xBC_r, 0623 (float*)data, 2, (float*)spline->coefs, 2); 0624 // Imaginarty part 0625 find_coefs_1d_s (spline->x_grid, xBC_i, 0626 ((float*)data)+1, 2, ((float*)spline->coefs+1), 2); 0627 0628 init_sse_data(); 0629 return spline; 0630 } 0631 0632 void 0633 recompute_UBspline_1d_c (UBspline_1d_c* spline, complex_float *data) 0634 { 0635 0636 BCtype_s xBC_r, xBC_i; 0637 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; 0638 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; 0639 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; 0640 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; 0641 0642 // Real part 0643 find_coefs_1d_s (spline->x_grid, xBC_r, 0644 (float*)data, 2, (float*)spline->coefs, 2); 0645 // Imaginarty part 0646 find_coefs_1d_s (spline->x_grid, xBC_i, 0647 ((float*)data)+1, 2, ((float*)spline->coefs+1), 2); 0648 } 0649 0650 0651 0652 UBspline_2d_c* 0653 create_UBspline_2d_c (Ugrid x_grid, Ugrid y_grid, 0654 BCtype_c xBC, BCtype_c yBC, complex_float *data) 0655 { 0656 // Create new spline 0657 UBspline_2d_c* restrict spline = static_cast<UBspline_2d_c*>(malloc(sizeof(UBspline_2d_c))); 0658 spline->spcode = U2D; 0659 spline->tcode = SINGLE_COMPLEX; 0660 spline->xBC = xBC; 0661 spline->yBC = yBC; 0662 0663 // Setup internal variables 0664 int Mx = x_grid.num; 0665 int My = y_grid.num; 0666 int Nx, Ny; 0667 0668 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; 0669 else Nx = Mx+2; 0670 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 0671 x_grid.delta_inv = 1.0/x_grid.delta; 0672 spline->x_grid = x_grid; 0673 0674 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; 0675 else Ny = My+2; 0676 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 0677 y_grid.delta_inv = 1.0/y_grid.delta; 0678 spline->y_grid = y_grid; 0679 spline->x_stride = Ny; 0680 0681 #ifndef HAVE_SSE2 0682 spline->coefs = (complex_float*)malloc (2*sizeof(float)*Nx*Ny); 0683 #else 0684 posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*Nx*Ny); 0685 #endif 0686 0687 BCtype_s xBC_r, xBC_i, yBC_r, yBC_i; 0688 xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; 0689 xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; 0690 xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; 0691 xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; 0692 yBC_r.lCode = yBC.lCode; yBC_r.rCode = yBC.rCode; 0693 yBC_r.lVal = yBC.lVal_r; yBC_r.rVal = yBC.rVal_r; 0694 yBC_i.lCode = yBC.lCode; yBC_i.rCode = yBC.rCode; 0695 yBC_i.lVal = yBC.lVal_i; yBC_i.rVal = yBC.rVal_i; 0696 // First, solve in the X-direction 0697 for (int iy=0; iy<My; iy++) { 0698 intptr_t doffset = 2*iy; 0699 intptr_t coffset = 2*iy; 0700 // Real part 0701 find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My, 0702 (float*)spline->coefs+coffset, 2*Ny); 0703 // Imag part 0704 find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My, 0705 ((float*)spline->coefs)+coffset+1, 2*Ny); 0706 } 0707 0708 // Now, solve in the Y-direction 0709 for (int ix=0; ix<Nx; ix++) { 0710 intptr_t doffset = 2*ix*Ny; 0711 intptr_t coffset = 2*ix*Ny; 0712 // Real part 0713 find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2, 0714 ((float*)spline->coefs)+coffset, 2); 0715 // Imag part 0716 find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2, 0717 ((float*)spline->coefs)+coffset+1, 2); 0718 } 0719 init_sse_data(); 0720 return spline; 0721 } 0722 0723 0724 void 0725 recompute_UBspline_2d_c (UBspline_2d_c* spline, complex_float *data) 0726 { 0727 // Setup internal variables 0728 int Mx = spline->x_grid.num; 0729 int My = spline->y_grid.num; 0730 int Nx, Ny; 0731 0732 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 0733 Nx = Mx+3; 0734 else 0735 Nx = Mx+2; 0736 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) 0737 Ny = My+3; 0738 else 0739 Ny = My+2; 0740 0741 BCtype_s xBC_r, xBC_i, yBC_r, yBC_i; 0742 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; 0743 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; 0744 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; 0745 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; 0746 yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; 0747 yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; 0748 yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; 0749 yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; 0750 0751 // First, solve in the X-direction 0752 for (int iy=0; iy<My; iy++) { 0753 intptr_t doffset = 2*iy; 0754 intptr_t coffset = 2*iy; 0755 // Real part 0756 find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My, 0757 (float*)spline->coefs+coffset, 2*Ny); 0758 // Imag part 0759 find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My, 0760 ((float*)spline->coefs)+coffset+1, 2*Ny); 0761 } 0762 0763 // Now, solve in the Y-direction 0764 for (int ix=0; ix<Nx; ix++) { 0765 intptr_t doffset = 2*ix*Ny; 0766 intptr_t coffset = 2*ix*Ny; 0767 // Real part 0768 find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2, 0769 ((float*)spline->coefs)+coffset, 2); 0770 // Imag part 0771 find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2, 0772 ((float*)spline->coefs)+coffset+1, 2); 0773 } 0774 } 0775 0776 UBspline_3d_c* 0777 create_UBspline_3d_c (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, 0778 BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, 0779 complex_float *data) 0780 { 0781 // Create new spline 0782 UBspline_3d_c* restrict spline = static_cast<UBspline_3d_c*>(malloc(sizeof(UBspline_3d_c))); 0783 spline->spcode = U3D; 0784 spline->tcode = SINGLE_COMPLEX; 0785 spline->xBC = xBC; 0786 spline->yBC = yBC; 0787 spline->zBC = zBC; 0788 0789 // Setup internal variables 0790 int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; 0791 int Nx, Ny, Nz; 0792 0793 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; 0794 else Nx = Mx+2; 0795 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 0796 x_grid.delta_inv = 1.0/x_grid.delta; 0797 spline->x_grid = x_grid; 0798 0799 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; 0800 else Ny = My+2; 0801 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 0802 y_grid.delta_inv = 1.0/y_grid.delta; 0803 spline->y_grid = y_grid; 0804 0805 if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3; 0806 else Nz = Mz+2; 0807 z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); 0808 z_grid.delta_inv = 1.0/z_grid.delta; 0809 spline->z_grid = z_grid; 0810 0811 spline->x_stride = Ny*Nz; 0812 spline->y_stride = Nz; 0813 0814 #ifndef HAVE_SSE2 0815 spline->coefs = new complex_float[Nx*Ny*Nz]; 0816 #else 0817 posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*Nx*Ny*Nz); 0818 #endif 0819 0820 BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; 0821 xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; 0822 xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; 0823 xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; 0824 xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; 0825 yBC_r.lCode = yBC.lCode; yBC_r.rCode = yBC.rCode; 0826 yBC_r.lVal = yBC.lVal_r; yBC_r.rVal = yBC.rVal_r; 0827 yBC_i.lCode = yBC.lCode; yBC_i.rCode = yBC.rCode; 0828 yBC_i.lVal = yBC.lVal_i; yBC_i.rVal = yBC.rVal_i; 0829 zBC_r.lCode = zBC.lCode; zBC_r.rCode = zBC.rCode; 0830 zBC_r.lVal = zBC.lVal_r; zBC_r.rVal = zBC.rVal_r; 0831 zBC_i.lCode = zBC.lCode; zBC_i.rCode = zBC.rCode; 0832 zBC_i.lVal = zBC.lVal_i; zBC_i.rVal = zBC.rVal_i; 0833 // First, solve in the X-direction 0834 for (int iy=0; iy<My; iy++) 0835 for (int iz=0; iz<Mz; iz++) { 0836 intptr_t doffset = 2*(iy*Mz+iz); 0837 intptr_t coffset = 2*(iy*Nz+iz); 0838 // Real part 0839 find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My*Mz, 0840 ((float*)spline->coefs)+coffset, 2*Ny*Nz); 0841 // Imag part 0842 find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My*Mz, 0843 ((float*)spline->coefs)+coffset+1, 2*Ny*Nz); 0844 } 0845 0846 // Now, solve in the Y-direction 0847 for (int ix=0; ix<Nx; ix++) 0848 for (int iz=0; iz<Nz; iz++) { 0849 intptr_t doffset = 2*(ix*Ny*Nz + iz); 0850 intptr_t coffset = 2*(ix*Ny*Nz + iz); 0851 // Real part 0852 find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2*Nz, 0853 ((float*)spline->coefs)+coffset, 2*Nz); 0854 // Imag part 0855 find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2*Nz, 0856 ((float*)spline->coefs)+coffset+1, 2*Nz); 0857 } 0858 0859 // Now, solve in the Z-direction 0860 for (int ix=0; ix<Nx; ix++) 0861 for (int iy=0; iy<Ny; iy++) { 0862 intptr_t doffset = 2*((ix*Ny+iy)*Nz); 0863 intptr_t coffset = 2*((ix*Ny+iy)*Nz); 0864 // Real part 0865 find_coefs_1d_s (spline->z_grid, zBC_r, ((float*)spline->coefs)+doffset, 2, 0866 ((float*)spline->coefs)+coffset, 2); 0867 // Imag part 0868 find_coefs_1d_s (spline->z_grid, zBC_i, ((float*)spline->coefs)+doffset+1, 2, 0869 ((float*)spline->coefs)+coffset+1, 2); 0870 } 0871 0872 init_sse_data(); 0873 return spline; 0874 } 0875 0876 void 0877 recompute_UBspline_3d_c (UBspline_3d_c* spline, complex_float *data) 0878 { 0879 // Setup internal variables 0880 int Mx = spline->x_grid.num; 0881 int My = spline->y_grid.num; 0882 int Mz = spline->z_grid.num; 0883 int Nx, Ny, Nz; 0884 0885 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; 0886 else Nx = Mx+2; 0887 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; 0888 else Ny = My+2; 0889 if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) Nz = Mz+3; 0890 else Nz = Mz+2; 0891 0892 BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; 0893 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; 0894 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; 0895 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; 0896 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; 0897 yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; 0898 yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; 0899 yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; 0900 yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; 0901 zBC_r.lCode = spline->zBC.lCode; zBC_r.rCode = spline->zBC.rCode; 0902 zBC_r.lVal = spline->zBC.lVal_r; zBC_r.rVal = spline->zBC.rVal_r; 0903 zBC_i.lCode = spline->zBC.lCode; zBC_i.rCode = spline->zBC.rCode; 0904 zBC_i.lVal = spline->zBC.lVal_i; zBC_i.rVal = spline->zBC.rVal_i; 0905 // First, solve in the X-direction 0906 for (int iy=0; iy<My; iy++) 0907 for (int iz=0; iz<Mz; iz++) { 0908 intptr_t doffset = 2*(iy*Mz+iz); 0909 intptr_t coffset = 2*(iy*Nz+iz); 0910 // Real part 0911 find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My*Mz, 0912 ((float*)spline->coefs)+coffset, 2*Ny*Nz); 0913 // Imag part 0914 find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My*Mz, 0915 ((float*)spline->coefs)+coffset+1, 2*Ny*Nz); 0916 } 0917 0918 // Now, solve in the Y-direction 0919 for (int ix=0; ix<Nx; ix++) 0920 for (int iz=0; iz<Nz; iz++) { 0921 intptr_t doffset = 2*(ix*Ny*Nz + iz); 0922 intptr_t coffset = 2*(ix*Ny*Nz + iz); 0923 // Real part 0924 find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2*Nz, 0925 ((float*)spline->coefs)+coffset, 2*Nz); 0926 // Imag part 0927 find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2*Nz, 0928 ((float*)spline->coefs)+coffset+1, 2*Nz); 0929 } 0930 0931 // Now, solve in the Z-direction 0932 for (int ix=0; ix<Nx; ix++) 0933 for (int iy=0; iy<Ny; iy++) { 0934 intptr_t doffset = 2*((ix*Ny+iy)*Nz); 0935 intptr_t coffset = 2*((ix*Ny+iy)*Nz); 0936 // Real part 0937 find_coefs_1d_s (spline->z_grid, zBC_r, ((float*)spline->coefs)+doffset, 2, 0938 ((float*)spline->coefs)+coffset, 2); 0939 // Imag part 0940 find_coefs_1d_s (spline->z_grid, zBC_i, ((float*)spline->coefs)+doffset+1, 2, 0941 ((float*)spline->coefs)+coffset+1, 2); 0942 } 0943 } 0944 0945 0946 //////////////////////////////////////////////////////////// 0947 //////////////////////////////////////////////////////////// 0948 //// Double-Precision, Real Creation Routines //// 0949 //////////////////////////////////////////////////////////// 0950 //////////////////////////////////////////////////////////// 0951 0952 // On input, bands should be filled with: 0953 // row 0 : abcdInitial from boundary conditions 0954 // rows 1:M: basis functions in first 3 cols, data in last 0955 // row M+1 : abcdFinal from boundary conditions 0956 // cstride gives the stride between values in coefs. 0957 // On exit, coefs with contain interpolating B-spline coefs 0958 void 0959 solve_deriv_interp_1d_d (double bands[], double coefs[], 0960 int M, int cstride) 0961 { 0962 // Solve interpolating equations 0963 // First and last rows are different 0964 bands[4*(0)+1] /= bands[4*(0)+0]; 0965 bands[4*(0)+2] /= bands[4*(0)+0]; 0966 bands[4*(0)+3] /= bands[4*(0)+0]; 0967 bands[4*(0)+0] = 1.0; 0968 bands[4*(1)+1] -= bands[4*(1)+0]*bands[4*(0)+1]; 0969 bands[4*(1)+2] -= bands[4*(1)+0]*bands[4*(0)+2]; 0970 bands[4*(1)+3] -= bands[4*(1)+0]*bands[4*(0)+3]; 0971 bands[4*(0)+0] = 0.0; 0972 bands[4*(1)+2] /= bands[4*(1)+1]; 0973 bands[4*(1)+3] /= bands[4*(1)+1]; 0974 bands[4*(1)+1] = 1.0; 0975 0976 // Now do rows 2 through M+1 0977 for (int row=2; row < (M+1); row++) { 0978 bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2]; 0979 bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3]; 0980 bands[4*(row)+2] /= bands[4*(row)+1]; 0981 bands[4*(row)+3] /= bands[4*(row)+1]; 0982 bands[4*(row)+0] = 0.0; 0983 bands[4*(row)+1] = 1.0; 0984 } 0985 0986 // Do last row 0987 bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2]; 0988 bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3]; 0989 bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2]; 0990 bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3]; 0991 bands[4*(M+1)+3] /= bands[4*(M+1)+2]; 0992 bands[4*(M+1)+2] = 1.0; 0993 0994 coefs[(M+1)*cstride] = bands[4*(M+1)+3]; 0995 // Now back substitute up 0996 for (int row=M; row>0; row--) 0997 coefs[row*cstride] = bands[4*(row)+3] - bands[4*(row)+2]*coefs[cstride*(row+1)]; 0998 0999 // Finish with first row 1000 coefs[0] = bands[4*(0)+3] - bands[4*(0)+1]*coefs[1*cstride] - bands[4*(0)+2]*coefs[2*cstride]; 1001 } 1002 1003 // On input, bands should be filled with: 1004 // row 0 : abcdInitial from boundary conditions 1005 // rows 1:M: basis functions in first 3 cols, data in last 1006 // row M+1 : abcdFinal from boundary conditions 1007 // cstride gives the stride between values in coefs. 1008 // On exit, coefs with contain interpolating B-spline coefs 1009 void 1010 solve_periodic_interp_1d_d (double bands[], double coefs[], 1011 int M, int cstride) 1012 { 1013 std::vector<double> lastCol(M); 1014 1015 // Now solve: 1016 // First and last rows are different 1017 bands[4*(0)+2] /= bands[4*(0)+1]; 1018 bands[4*(0)+0] /= bands[4*(0)+1]; 1019 bands[4*(0)+3] /= bands[4*(0)+1]; 1020 bands[4*(0)+1] = 1.0; 1021 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0]; 1022 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3]; 1023 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(0)+2]; 1024 lastCol[0] = bands[4*(0)+0]; 1025 1026 for (int row=1; row < (M-1); row++) { 1027 bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2]; 1028 bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3]; 1029 lastCol[row] = -bands[4*(row)+0] * lastCol[row-1]; 1030 bands[4*(row)+0] = 0.0; 1031 bands[4*(row)+2] /= bands[4*(row)+1]; 1032 bands[4*(row)+3] /= bands[4*(row)+1]; 1033 lastCol[row] /= bands[4*(row)+1]; 1034 bands[4*(row)+1] = 1.0; 1035 if (row < (M-2)) { 1036 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3]; 1037 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row]; 1038 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2]; 1039 } 1040 } 1041 1042 // Now do last row 1043 // The [2] element and [0] element are now on top of each other 1044 bands[4*(M-1)+0] += bands[4*(M-1)+2]; 1045 bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]); 1046 bands[4*(M-1)+3] -= bands[4*(M-1)+0] * bands[4*(M-2)+3]; 1047 bands[4*(M-1)+3] /= bands[4*(M-1)+1]; 1048 coefs[M*cstride] = bands[4*(M-1)+3]; 1049 for (int row=M-2; row>=0; row--) 1050 coefs[(row+1)*cstride] = 1051 bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride]; 1052 1053 coefs[0*cstride] = coefs[M*cstride]; 1054 coefs[(M+1)*cstride] = coefs[1*cstride]; 1055 coefs[(M+2)*cstride] = coefs[2*cstride]; 1056 } 1057 1058 // On input, bands should be filled with: 1059 // row 0 : abcdInitial from boundary conditions 1060 // rows 1:M: basis functions in first 3 cols, data in last 1061 // row M+1 : abcdFinal from boundary conditions 1062 // cstride gives the stride between values in coefs. 1063 // On exit, coefs with contain interpolating B-spline coefs 1064 void 1065 solve_antiperiodic_interp_1d_d (double bands[], double coefs[], 1066 int M, int cstride) 1067 { 1068 std::vector<double> lastCol(M); 1069 1070 bands[4*0+0] *= -1.0; 1071 bands[4*(M-1)+2] *= -1.0; 1072 // Now solve: 1073 // First and last rows are different 1074 bands[4*(0)+2] /= bands[4*(0)+1]; 1075 bands[4*(0)+0] /= bands[4*(0)+1]; 1076 bands[4*(0)+3] /= bands[4*(0)+1]; 1077 bands[4*(0)+1] = 1.0; 1078 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0]; 1079 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3]; 1080 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(0)+2]; 1081 lastCol[0] = bands[4*(0)+0]; 1082 1083 for (int row=1; row < (M-1); row++) { 1084 bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2]; 1085 bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3]; 1086 lastCol[row] = -bands[4*(row)+0] * lastCol[row-1]; 1087 bands[4*(row)+0] = 0.0; 1088 bands[4*(row)+2] /= bands[4*(row)+1]; 1089 bands[4*(row)+3] /= bands[4*(row)+1]; 1090 lastCol[row] /= bands[4*(row)+1]; 1091 bands[4*(row)+1] = 1.0; 1092 if (row < (M-2)) { 1093 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3]; 1094 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row]; 1095 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2]; 1096 } 1097 } 1098 1099 // Now do last row 1100 // The [2] element and [0] element are now on top of each other 1101 bands[4*(M-1)+0] += bands[4*(M-1)+2]; 1102 bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]); 1103 bands[4*(M-1)+3] -= bands[4*(M-1)+0] * bands[4*(M-2)+3]; 1104 bands[4*(M-1)+3] /= bands[4*(M-1)+1]; 1105 coefs[M*cstride] = bands[4*(M-1)+3]; 1106 for (int row=M-2; row>=0; row--) 1107 coefs[(row+1)*cstride] = 1108 bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride]; 1109 1110 coefs[0*cstride] = -coefs[M*cstride]; 1111 coefs[(M+1)*cstride] = -coefs[1*cstride]; 1112 coefs[(M+2)*cstride] = -coefs[2*cstride]; 1113 } 1114 1115 1116 1117 void 1118 find_coefs_1d_d (Ugrid grid, BCtype_d bc, 1119 double *data, intptr_t dstride, 1120 double *coefs, intptr_t cstride) 1121 { 1122 int M = grid.num; 1123 double basis[4] = {1.0/6.0, 2.0/3.0, 1.0/6.0, 0.0}; 1124 if (bc.lCode == PERIODIC || bc.lCode == ANTIPERIODIC) { 1125 #ifdef HAVE_C_VARARRAYS 1126 double bands[M*4]; 1127 #else 1128 double *bands = new double[4*M]; 1129 #endif 1130 for (int i=0; i<M; i++) { 1131 bands[4*i+0] = basis[0]; 1132 bands[4*i+1] = basis[1]; 1133 bands[4*i+2] = basis[2]; 1134 bands[4*i+3] = data[i*dstride]; 1135 } 1136 if (bc.lCode == ANTIPERIODIC) 1137 solve_antiperiodic_interp_1d_d (bands, coefs, M, cstride); 1138 else 1139 solve_periodic_interp_1d_d (bands, coefs, M, cstride); 1140 1141 1142 #ifndef HAVE_C_VARARRAYS 1143 delete[] bands; 1144 #endif 1145 } 1146 else { 1147 // Setup boundary conditions 1148 double abcd_left[4]; 1149 double abcd_right[4]; 1150 for (int i = 0; i < 4; i++) { 1151 abcd_left[i] = 0.0; 1152 abcd_right[i] = 0.0; 1153 } 1154 // Left boundary 1155 if (bc.lCode == FLAT || bc.lCode == NATURAL) 1156 bc.lVal = 0.0; 1157 if (bc.lCode == FLAT || bc.lCode == DERIV1) { 1158 abcd_left[0] = -0.5 * grid.delta_inv; 1159 abcd_left[1] = 0.0 * grid.delta_inv; 1160 abcd_left[2] = 0.5 * grid.delta_inv; 1161 abcd_left[3] = bc.lVal; 1162 } 1163 if (bc.lCode == NATURAL || bc.lCode == DERIV2) { 1164 abcd_left[0] = 1.0 * grid.delta_inv * grid.delta_inv; 1165 abcd_left[1] =-2.0 * grid.delta_inv * grid.delta_inv; 1166 abcd_left[2] = 1.0 * grid.delta_inv * grid.delta_inv; 1167 abcd_left[3] = bc.lVal; 1168 } 1169 1170 // Right boundary 1171 if (bc.rCode == FLAT || bc.rCode == NATURAL) 1172 bc.rVal = 0.0; 1173 if (bc.rCode == FLAT || bc.rCode == DERIV1) { 1174 abcd_right[0] = -0.5 * grid.delta_inv; 1175 abcd_right[1] = 0.0 * grid.delta_inv; 1176 abcd_right[2] = 0.5 * grid.delta_inv; 1177 abcd_right[3] = bc.rVal; 1178 } 1179 if (bc.rCode == NATURAL || bc.rCode == DERIV2) { 1180 abcd_right[0] = 1.0 *grid.delta_inv * grid.delta_inv; 1181 abcd_right[1] =-2.0 *grid.delta_inv * grid.delta_inv; 1182 abcd_right[2] = 1.0 *grid.delta_inv * grid.delta_inv; 1183 abcd_right[3] = bc.rVal; 1184 } 1185 #ifdef HAVE_C_VARARRAYS 1186 double bands[(M+2)*4]; 1187 #else 1188 double *bands = new double[(M+2)*4]; 1189 #endif 1190 for (int i=0; i<4; i++) { 1191 bands[4*( 0 )+i] = abcd_left[i]; 1192 bands[4*(M+1)+i] = abcd_right[i]; 1193 } 1194 for (int i=0; i<M; i++) { 1195 for (int j=0; j<3; j++) 1196 bands[4*(i+1)+j] = basis[j]; 1197 bands[4*(i+1)+3] = data[i*dstride]; 1198 } 1199 // Now, solve for coefficients 1200 solve_deriv_interp_1d_d (bands, coefs, M, cstride); 1201 #ifndef HAVE_C_VARARRAYS 1202 delete[] bands; 1203 #endif 1204 } 1205 } 1206 1207 1208 1209 UBspline_1d_d* 1210 create_UBspline_1d_d (Ugrid x_grid, BCtype_d xBC, double *data) 1211 { 1212 // Create new spline 1213 UBspline_1d_d* restrict spline = static_cast<UBspline_1d_d*>(malloc(sizeof(UBspline_1d_d))); 1214 spline->spcode = U1D; 1215 spline->tcode = DOUBLE_REAL; 1216 spline->xBC = xBC; 1217 1218 // Setup internal variables 1219 int M = x_grid.num; 1220 int N; 1221 1222 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { 1223 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); 1224 N = M+3; 1225 } 1226 else { 1227 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); 1228 N = M+2; 1229 } 1230 1231 x_grid.delta_inv = 1.0/x_grid.delta; 1232 spline->x_grid = x_grid; 1233 1234 #ifndef HAVE_SSE2 1235 spline->coefs = (double*)malloc (sizeof(double)*N); 1236 #else 1237 posix_memalign ((void**)&spline->coefs, 16, sizeof(double)*N); 1238 #endif 1239 find_coefs_1d_d (spline->x_grid, xBC, data, 1, spline->coefs, 1); 1240 1241 init_sse_data(); 1242 return spline; 1243 } 1244 1245 void 1246 recompute_UBspline_1d_d (UBspline_1d_d* spline, double *data) 1247 { 1248 find_coefs_1d_d (spline->x_grid, spline->xBC, data, 1, spline->coefs, 1); 1249 } 1250 1251 1252 UBspline_2d_d* 1253 create_UBspline_2d_d (Ugrid x_grid, Ugrid y_grid, 1254 BCtype_d xBC, BCtype_d yBC, double *data) 1255 { 1256 // Create new spline 1257 UBspline_2d_d* restrict spline = static_cast<UBspline_2d_d*>(malloc(sizeof(UBspline_2d_d))); 1258 spline->spcode = U2D; 1259 spline->tcode = DOUBLE_REAL; 1260 spline->xBC = xBC; 1261 spline->yBC = yBC; 1262 1263 // Setup internal variables 1264 int Mx = x_grid.num; 1265 int My = y_grid.num; 1266 int Nx, Ny; 1267 1268 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; 1269 else Nx = Mx+2; 1270 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 1271 x_grid.delta_inv = 1.0/x_grid.delta; 1272 spline->x_grid = x_grid; 1273 1274 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; 1275 else Ny = My+2; 1276 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 1277 y_grid.delta_inv = 1.0/y_grid.delta; 1278 spline->y_grid = y_grid; 1279 spline->x_stride = Ny; 1280 1281 #ifndef HAVE_SSE2 1282 spline->coefs = (double*)malloc (sizeof(double)*Nx*Ny); 1283 #else 1284 posix_memalign ((void**)&spline->coefs, 16, (sizeof(double)*Nx*Ny)); 1285 #endif 1286 1287 // First, solve in the X-direction 1288 for (int iy=0; iy<My; iy++) { 1289 intptr_t doffset = iy; 1290 intptr_t coffset = iy; 1291 find_coefs_1d_d (spline->x_grid, xBC, data+doffset, My, 1292 spline->coefs+coffset, Ny); 1293 } 1294 1295 // Now, solve in the Y-direction 1296 for (int ix=0; ix<Nx; ix++) { 1297 intptr_t doffset = ix*Ny; 1298 intptr_t coffset = ix*Ny; 1299 find_coefs_1d_d (spline->y_grid, yBC, spline->coefs+doffset, 1, 1300 spline->coefs+coffset, 1); 1301 } 1302 1303 init_sse_data(); 1304 return spline; 1305 } 1306 1307 void 1308 recompute_UBspline_2d_d (UBspline_2d_d* spline, double *data) 1309 { 1310 int Mx = spline->x_grid.num; 1311 int My = spline->y_grid.num; 1312 int Nx, Ny; 1313 1314 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 1315 Nx = Mx+3; 1316 else 1317 Nx = Mx+2; 1318 1319 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) 1320 Ny = My+3; 1321 else 1322 Ny = My+2; 1323 1324 // First, solve in the X-direction 1325 for (int iy=0; iy<My; iy++) { 1326 intptr_t doffset = iy; 1327 intptr_t coffset = iy; 1328 find_coefs_1d_d (spline->x_grid, spline->xBC, data+doffset, My, 1329 spline->coefs+coffset, Ny); 1330 } 1331 1332 // Now, solve in the Y-direction 1333 for (int ix=0; ix<Nx; ix++) { 1334 intptr_t doffset = ix*Ny; 1335 intptr_t coffset = ix*Ny; 1336 find_coefs_1d_d (spline->y_grid, spline->yBC, spline->coefs+doffset, 1, 1337 spline->coefs+coffset, 1); 1338 } 1339 } 1340 1341 1342 UBspline_3d_d* 1343 create_UBspline_3d_d (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, 1344 BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, 1345 double *data) 1346 { 1347 // Create new spline 1348 UBspline_3d_d* restrict spline = static_cast<UBspline_3d_d*>(malloc(sizeof(UBspline_3d_d))); 1349 spline->spcode = U3D; 1350 spline->tcode = DOUBLE_REAL; 1351 spline->xBC = xBC; 1352 spline->yBC = yBC; 1353 spline->zBC = zBC; 1354 1355 // Setup internal variables 1356 int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; 1357 int Nx, Ny, Nz; 1358 1359 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; 1360 else Nx = Mx+2; 1361 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 1362 x_grid.delta_inv = 1.0/x_grid.delta; 1363 spline->x_grid = x_grid; 1364 1365 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; 1366 else Ny = My+2; 1367 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 1368 y_grid.delta_inv = 1.0/y_grid.delta; 1369 spline->y_grid = y_grid; 1370 1371 if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3; 1372 else Nz = Mz+2; 1373 z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); 1374 z_grid.delta_inv = 1.0/z_grid.delta; 1375 spline->z_grid = z_grid; 1376 1377 spline->x_stride = Ny*Nz; 1378 spline->y_stride = Nz; 1379 1380 #ifndef HAVE_SSE2 1381 spline->coefs = new double[Nx*Ny*Nz]; 1382 #else 1383 posix_memalign ((void**)&spline->coefs, 16, (sizeof(double)*Nx*Ny*Nz)); 1384 #endif 1385 1386 // First, solve in the X-direction 1387 for (int iy=0; iy<My; iy++) 1388 for (int iz=0; iz<Mz; iz++) { 1389 intptr_t doffset = iy*Mz+iz; 1390 intptr_t coffset = iy*Nz+iz; 1391 find_coefs_1d_d (spline->x_grid, xBC, data+doffset, My*Mz, 1392 spline->coefs+coffset, Ny*Nz); 1393 } 1394 1395 // Now, solve in the Y-direction 1396 for (int ix=0; ix<Nx; ix++) 1397 for (int iz=0; iz<Nz; iz++) { 1398 intptr_t doffset = ix*Ny*Nz + iz; 1399 intptr_t coffset = ix*Ny*Nz + iz; 1400 find_coefs_1d_d (spline->y_grid, yBC, spline->coefs+doffset, Nz, 1401 spline->coefs+coffset, Nz); 1402 } 1403 1404 // Now, solve in the Z-direction 1405 for (int ix=0; ix<Nx; ix++) 1406 for (int iy=0; iy<Ny; iy++) { 1407 intptr_t doffset = (ix*Ny+iy)*Nz; 1408 intptr_t coffset = (ix*Ny+iy)*Nz; 1409 find_coefs_1d_d (spline->z_grid, zBC, spline->coefs+doffset, 1, 1410 spline->coefs+coffset, 1); 1411 } 1412 1413 init_sse_data(); 1414 return spline; 1415 } 1416 1417 void 1418 recompute_UBspline_3d_d (UBspline_3d_d* spline, double *data) 1419 { 1420 int Mx = spline->x_grid.num; 1421 int My = spline->y_grid.num; 1422 int Mz = spline->z_grid.num; 1423 int Nx, Ny, Nz; 1424 1425 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 1426 Nx = Mx+3; 1427 else 1428 Nx = Mx+2; 1429 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) 1430 Ny = My+3; 1431 else 1432 Ny = My+2; 1433 if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) 1434 Nz = Mz+3; 1435 else 1436 Nz = Mz+2; 1437 1438 // First, solve in the X-direction 1439 for (int iy=0; iy<My; iy++) 1440 for (int iz=0; iz<Mz; iz++) { 1441 intptr_t doffset = iy*Mz+iz; 1442 intptr_t coffset = iy*Nz+iz; 1443 find_coefs_1d_d (spline->x_grid, spline->xBC, data+doffset, My*Mz, 1444 spline->coefs+coffset, Ny*Nz); 1445 } 1446 1447 // Now, solve in the Y-direction 1448 for (int ix=0; ix<Nx; ix++) 1449 for (int iz=0; iz<Nz; iz++) { 1450 intptr_t doffset = ix*Ny*Nz + iz; 1451 intptr_t coffset = ix*Ny*Nz + iz; 1452 find_coefs_1d_d (spline->y_grid, spline->yBC, spline->coefs+doffset, Nz, 1453 spline->coefs+coffset, Nz); 1454 } 1455 1456 // Now, solve in the Z-direction 1457 for (int ix=0; ix<Nx; ix++) 1458 for (int iy=0; iy<Ny; iy++) { 1459 intptr_t doffset = (ix*Ny+iy)*Nz; 1460 intptr_t coffset = (ix*Ny+iy)*Nz; 1461 find_coefs_1d_d (spline->z_grid, spline->zBC, spline->coefs+doffset, 1, 1462 spline->coefs+coffset, 1); 1463 } 1464 } 1465 1466 1467 //////////////////////////////////////////////////////////// 1468 //////////////////////////////////////////////////////////// 1469 //// Double-Precision, Complex Creation Routines //// 1470 //////////////////////////////////////////////////////////// 1471 //////////////////////////////////////////////////////////// 1472 1473 // On input, bands should be filled with: 1474 // row 0 : abcdInitial from boundary conditions 1475 // rows 1:M: basis functions in first 3 cols, data in last 1476 // row M+1 : abcdFinal from boundary conditions 1477 // cstride gives the stride between values in coefs. 1478 // On exit, coefs with contain interpolating B-spline coefs 1479 1480 1481 UBspline_1d_z* 1482 create_UBspline_1d_z (Ugrid x_grid, BCtype_z xBC, complex_double *data) 1483 { 1484 // Create new spline 1485 UBspline_1d_z* restrict spline = static_cast<UBspline_1d_z*>(malloc(sizeof(UBspline_1d_z))); 1486 spline->spcode = U1D; 1487 spline->tcode = DOUBLE_COMPLEX; 1488 spline->xBC = xBC; 1489 1490 // Setup internal variables 1491 int M = x_grid.num; 1492 int N; 1493 1494 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { 1495 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); 1496 N = M+3; 1497 } 1498 else { 1499 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); 1500 N = M+2; 1501 } 1502 1503 x_grid.delta_inv = 1.0/x_grid.delta; 1504 spline->x_grid = x_grid; 1505 #ifndef HAVE_SSE2 1506 spline->coefs = (complex_double*)malloc (2*sizeof(double)*N); 1507 #else 1508 posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*N); 1509 #endif 1510 1511 BCtype_d xBC_r, xBC_i; 1512 xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; 1513 xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; 1514 xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; 1515 xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; 1516 // Real part 1517 find_coefs_1d_d (spline->x_grid, xBC_r, (double*)data, 2, 1518 (double*)spline->coefs, 2); 1519 // Imaginarty part 1520 find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+1, 2, 1521 ((double*)spline->coefs)+1, 2); 1522 1523 init_sse_data(); 1524 return spline; 1525 } 1526 1527 void 1528 recompute_UBspline_1d_z (UBspline_1d_z* spline, complex_double *data) 1529 { 1530 // int M = spline->x_grid.num; 1531 // int N; 1532 1533 // if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 1534 // N = M+3; 1535 // else 1536 // N = M+2; 1537 1538 BCtype_d xBC_r, xBC_i; 1539 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; 1540 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; 1541 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; 1542 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; 1543 // Real part 1544 find_coefs_1d_d (spline->x_grid, xBC_r, (double*)data, 2, 1545 (double*)spline->coefs, 2); 1546 // Imaginarty part 1547 find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+1, 2, 1548 ((double*)spline->coefs)+1, 2); 1549 1550 } 1551 1552 1553 UBspline_2d_z* 1554 create_UBspline_2d_z (Ugrid x_grid, Ugrid y_grid, 1555 BCtype_z xBC, BCtype_z yBC, complex_double *data) 1556 { 1557 // Create new spline 1558 UBspline_2d_z* restrict spline = static_cast<UBspline_2d_z*>(malloc(sizeof(UBspline_2d_z))); 1559 spline->spcode = U2D; 1560 spline->tcode = DOUBLE_COMPLEX; 1561 spline->xBC = xBC; 1562 spline->yBC = yBC; 1563 1564 // Setup internal variables 1565 int Mx = x_grid.num; 1566 int My = y_grid.num; 1567 int Nx, Ny; 1568 1569 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) 1570 Nx = Mx+3; 1571 else 1572 Nx = Mx+2; 1573 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 1574 x_grid.delta_inv = 1.0/x_grid.delta; 1575 spline->x_grid = x_grid; 1576 1577 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) 1578 Ny = My+3; 1579 else 1580 Ny = My+2; 1581 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 1582 y_grid.delta_inv = 1.0/y_grid.delta; 1583 spline->y_grid = y_grid; 1584 spline->x_stride = Ny; 1585 1586 #ifndef HAVE_SSE2 1587 spline->coefs = (complex_double*)malloc (2*sizeof(double)*Nx*Ny); 1588 #else 1589 posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*Nx*Ny); 1590 #endif 1591 1592 BCtype_d xBC_r, xBC_i, yBC_r, yBC_i; 1593 xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; 1594 xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; 1595 xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; 1596 xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; 1597 yBC_r.lCode = yBC.lCode; yBC_r.rCode = yBC.rCode; 1598 yBC_r.lVal = yBC.lVal_r; yBC_r.rVal = yBC.rVal_r; 1599 yBC_i.lCode = yBC.lCode; yBC_i.rCode = yBC.rCode; 1600 yBC_i.lVal = yBC.lVal_i; yBC_i.rVal = yBC.rVal_i; 1601 // First, solve in the X-direction 1602 for (int iy=0; iy<My; iy++) { 1603 intptr_t doffset = 2*iy; 1604 intptr_t coffset = 2*iy; 1605 // Real part 1606 find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data+doffset), 2*My, 1607 (double*)spline->coefs+coffset, 2*Ny); 1608 // Imag part 1609 find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My, 1610 ((double*)spline->coefs)+coffset+1, 2*Ny); 1611 } 1612 1613 // Now, solve in the Y-direction 1614 for (int ix=0; ix<Nx; ix++) { 1615 intptr_t doffset = 2*ix*Ny; 1616 intptr_t coffset = 2*ix*Ny; 1617 // Real part 1618 find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2, 1619 (double*)spline->coefs+coffset, 2); 1620 // Imag part 1621 find_coefs_1d_d (spline->y_grid, yBC_i, (double*)spline->coefs+doffset+1, 2, 1622 ((double*)spline->coefs)+coffset+1, 2); 1623 } 1624 1625 init_sse_data(); 1626 return spline; 1627 } 1628 1629 1630 void 1631 recompute_UBspline_2d_z (UBspline_2d_z* spline, complex_double *data) 1632 { 1633 int Mx = spline->x_grid.num; 1634 int My = spline->y_grid.num; 1635 int Nx, Ny; 1636 1637 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 1638 Nx = Mx+3; 1639 else 1640 Nx = Mx+2; 1641 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) 1642 Ny = My+3; 1643 else 1644 Ny = My+2; 1645 1646 BCtype_d xBC_r, xBC_i, yBC_r, yBC_i; 1647 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; 1648 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; 1649 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; 1650 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; 1651 yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; 1652 yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; 1653 yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; 1654 yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; 1655 1656 // First, solve in the X-direction 1657 for (int iy=0; iy<My; iy++) { 1658 intptr_t doffset = 2*iy; 1659 intptr_t coffset = 2*iy; 1660 // Real part 1661 find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data+doffset), 2*My, 1662 (double*)spline->coefs+coffset, 2*Ny); 1663 // Imag part 1664 find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My, 1665 ((double*)spline->coefs)+coffset+1, 2*Ny); 1666 } 1667 1668 // Now, solve in the Y-direction 1669 for (int ix=0; ix<Nx; ix++) { 1670 intptr_t doffset = 2*ix*Ny; 1671 intptr_t coffset = 2*ix*Ny; 1672 // Real part 1673 find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2, 1674 (double*)spline->coefs+coffset, 2); 1675 // Imag part 1676 find_coefs_1d_d (spline->y_grid, yBC_i, (double*)spline->coefs+doffset+1, 2, 1677 ((double*)spline->coefs)+coffset+1, 2); 1678 } 1679 } 1680 1681 1682 1683 UBspline_3d_z* 1684 create_UBspline_3d_z (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, 1685 BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, 1686 complex_double *data) 1687 { 1688 // Create new spline 1689 UBspline_3d_z* restrict spline = static_cast<UBspline_3d_z*>(malloc(sizeof(UBspline_3d_z))); 1690 spline->spcode = U3D; 1691 spline->tcode = DOUBLE_COMPLEX; 1692 spline->xBC = xBC; 1693 spline->yBC = yBC; 1694 spline->zBC = zBC; 1695 1696 // Setup internal variables 1697 int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; 1698 int Nx, Ny, Nz; 1699 1700 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; 1701 else Nx = Mx+2; 1702 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 1703 x_grid.delta_inv = 1.0/x_grid.delta; 1704 spline->x_grid = x_grid; 1705 1706 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; 1707 else Ny = My+2; 1708 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 1709 y_grid.delta_inv = 1.0/y_grid.delta; 1710 spline->y_grid = y_grid; 1711 1712 if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3; 1713 else Nz = Mz+2; 1714 z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); 1715 z_grid.delta_inv = 1.0/z_grid.delta; 1716 spline->z_grid = z_grid; 1717 1718 spline->x_stride = Ny*Nz; 1719 spline->y_stride = Nz; 1720 1721 #ifndef HAVE_SSE2 1722 spline->coefs = new complex_double[Nx*Ny*Nz]; 1723 #else 1724 posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*Nx*Ny*Nz); 1725 #endif 1726 1727 BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; 1728 xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; 1729 xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; 1730 xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; 1731 xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; 1732 yBC_r.lCode = yBC.lCode; yBC_r.rCode = yBC.rCode; 1733 yBC_r.lVal = yBC.lVal_r; yBC_r.rVal = yBC.rVal_r; 1734 yBC_i.lCode = yBC.lCode; yBC_i.rCode = yBC.rCode; 1735 yBC_i.lVal = yBC.lVal_i; yBC_i.rVal = yBC.rVal_i; 1736 zBC_r.lCode = zBC.lCode; zBC_r.rCode = zBC.rCode; 1737 zBC_r.lVal = zBC.lVal_r; zBC_r.rVal = zBC.rVal_r; 1738 zBC_i.lCode = zBC.lCode; zBC_i.rCode = zBC.rCode; 1739 zBC_i.lVal = zBC.lVal_i; zBC_i.rVal = zBC.rVal_i; 1740 // First, solve in the X-direction 1741 for (int iy=0; iy<My; iy++) 1742 for (int iz=0; iz<Mz; iz++) { 1743 intptr_t doffset = 2*(iy*Mz+iz); 1744 intptr_t coffset = 2*(iy*Nz+iz); 1745 // Real part 1746 find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data)+doffset, 2*My*Mz, 1747 ((double*)spline->coefs)+coffset, 2*Ny*Nz); 1748 // Imag part 1749 find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My*Mz, 1750 ((double*)spline->coefs)+coffset+1, 2*Ny*Nz); 1751 } 1752 1753 // Now, solve in the Y-direction 1754 for (int ix=0; ix<Nx; ix++) 1755 for (int iz=0; iz<Nz; iz++) { 1756 intptr_t doffset = 2*(ix*Ny*Nz + iz); 1757 intptr_t coffset = 2*(ix*Ny*Nz + iz); 1758 // Real part 1759 find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2*Nz, 1760 ((double*)spline->coefs)+coffset, 2*Nz); 1761 // Imag part 1762 find_coefs_1d_d (spline->y_grid, yBC_i, ((double*)spline->coefs)+doffset+1, 2*Nz, 1763 ((double*)spline->coefs)+coffset+1, 2*Nz); 1764 } 1765 1766 // Now, solve in the Z-direction 1767 for (int ix=0; ix<Nx; ix++) 1768 for (int iy=0; iy<Ny; iy++) { 1769 intptr_t doffset = 2*((ix*Ny+iy)*Nz); 1770 intptr_t coffset = 2*((ix*Ny+iy)*Nz); 1771 // Real part 1772 find_coefs_1d_d (spline->z_grid, zBC_r, ((double*)spline->coefs)+doffset, 2, 1773 ((double*)spline->coefs)+coffset, 2); 1774 // Imag part 1775 find_coefs_1d_d (spline->z_grid, zBC_i, ((double*)spline->coefs)+doffset+1, 2, 1776 ((double*)spline->coefs)+coffset+1, 2); 1777 } 1778 init_sse_data(); 1779 return spline; 1780 } 1781 1782 void 1783 recompute_UBspline_3d_z (UBspline_3d_z* spline, complex_double *data) 1784 { 1785 // Setup internal variables 1786 int Mx = spline->x_grid.num; 1787 int My = spline->y_grid.num; 1788 int Mz = spline->z_grid.num; 1789 int Nx, Ny, Nz; 1790 1791 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 1792 Nx = Mx+3; 1793 else 1794 Nx = Mx+2; 1795 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) 1796 Ny = My+3; 1797 else 1798 Ny = My+2; 1799 if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) 1800 Nz = Mz+3; 1801 else 1802 Nz = Mz+2; 1803 1804 BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; 1805 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; 1806 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; 1807 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; 1808 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; 1809 yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; 1810 yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; 1811 yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; 1812 yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; 1813 zBC_r.lCode = spline->zBC.lCode; zBC_r.rCode = spline->zBC.rCode; 1814 zBC_r.lVal = spline->zBC.lVal_r; zBC_r.rVal = spline->zBC.rVal_r; 1815 zBC_i.lCode = spline->zBC.lCode; zBC_i.rCode = spline->zBC.rCode; 1816 zBC_i.lVal = spline->zBC.lVal_i; zBC_i.rVal = spline->zBC.rVal_i; 1817 // First, solve in the X-direction 1818 for (int iy=0; iy<My; iy++) 1819 for (int iz=0; iz<Mz; iz++) { 1820 intptr_t doffset = 2*(iy*Mz+iz); 1821 intptr_t coffset = 2*(iy*Nz+iz); 1822 // Real part 1823 find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data)+doffset, 2*My*Mz, 1824 ((double*)spline->coefs)+coffset, 2*Ny*Nz); 1825 // Imag part 1826 find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My*Mz, 1827 ((double*)spline->coefs)+coffset+1, 2*Ny*Nz); 1828 } 1829 1830 // Now, solve in the Y-direction 1831 for (int ix=0; ix<Nx; ix++) 1832 for (int iz=0; iz<Nz; iz++) { 1833 intptr_t doffset = 2*(ix*Ny*Nz + iz); 1834 intptr_t coffset = 2*(ix*Ny*Nz + iz); 1835 // Real part 1836 find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2*Nz, 1837 ((double*)spline->coefs)+coffset, 2*Nz); 1838 // Imag part 1839 find_coefs_1d_d (spline->y_grid, yBC_i, ((double*)spline->coefs)+doffset+1, 2*Nz, 1840 ((double*)spline->coefs)+coffset+1, 2*Nz); 1841 } 1842 1843 // Now, solve in the Z-direction 1844 for (int ix=0; ix<Nx; ix++) 1845 for (int iy=0; iy<Ny; iy++) { 1846 intptr_t doffset = 2*((ix*Ny+iy)*Nz); 1847 intptr_t coffset = 2*((ix*Ny+iy)*Nz); 1848 // Real part 1849 find_coefs_1d_d (spline->z_grid, zBC_r, ((double*)spline->coefs)+doffset, 2, 1850 ((double*)spline->coefs)+coffset, 2); 1851 // Imag part 1852 find_coefs_1d_d (spline->z_grid, zBC_i, ((double*)spline->coefs)+doffset+1, 2, 1853 ((double*)spline->coefs)+coffset+1, 2); 1854 } 1855 } 1856 1857 1858 void 1859 destroy_UBspline (Bspline *spline) 1860 { 1861 free(spline->coefs); 1862 free(spline); 1863 } 1864 1865 void 1866 destroy_NUBspline (Bspline *spline); 1867 1868 void 1869 destroy_multi_UBspline (Bspline *spline); 1870 1871 void 1872 destroy_Bspline (void *spline) 1873 { 1874 Bspline *sp = (Bspline *)spline; 1875 if (sp->sp_code <= U3D) 1876 destroy_UBspline (sp); 1877 else if (sp->sp_code <= NU3D) 1878 destroy_NUBspline (sp); 1879 else if (sp->sp_code <= MULTI_U3D) 1880 destroy_multi_UBspline (sp); 1881 else 1882 fprintf (stderr, "Error in destroy_Bspline: invalid spline code %d.\n", 1883 sp->sp_code); 1884 }