File indexing completed on 2024-12-22 04:09:45
0001 ///////////////////////////////////////////////////////////////////////////// 0002 // einspline: a library for creating and evaluating B-splines // 0003 // Copyright (C) 2007 Kenneth P. Esler, Jr. // 0004 // // 0005 // This program is free software; you can redistribute it and/or modify // 0006 // it under the terms of the GNU General Public License as published by // 0007 // the Free Software Foundation; either version 2 of the License, or // 0008 // (at your option) any later version. // 0009 // // 0010 // This program is distributed in the hope that it will be useful, // 0011 // but WITHOUT ANY WARRANTY; without even the implied warranty of // 0012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // 0013 // GNU General Public License for more details. // 0014 // // 0015 // You should have received a copy of the GNU General Public License // 0016 // along with this program; if not, write to the Free Software // 0017 // Foundation, Inc., 51 Franklin Street, Fifth Floor, // 0018 // Boston, MA 02110-1301 USA // 0019 ///////////////////////////////////////////////////////////////////////////// 0020 0021 #include "multi_bspline_create.h" 0022 #ifndef _XOPEN_SOURCE 0023 #define _XOPEN_SOURCE 600 0024 #endif 0025 #ifndef __USE_XOPEN2K 0026 #define __USE_XOPEN2K 0027 #endif 0028 #include <stdlib.h> 0029 #include <stdio.h> 0030 0031 int posix_memalign(void **memptr, size_t alignment, size_t size); 0032 0033 //////////////////////////////////////////////////////////// 0034 //////////////////////////////////////////////////////////// 0035 //// Helper functions for spline creation //// 0036 //////////////////////////////////////////////////////////// 0037 //////////////////////////////////////////////////////////// 0038 void init_sse_data(); 0039 0040 void 0041 find_coefs_1d_d (Ugrid grid, BCtype_d bc, 0042 double *data, intptr_t dstride, 0043 double *coefs, intptr_t cstride); 0044 0045 void 0046 solve_deriv_interp_1d_s (float bands[], float coefs[], 0047 int M, int cstride); 0048 0049 // On input, bands should be filled with: 0050 // row 0 : abcdInitial from boundary conditions 0051 // rows 1:M: basis functions in first 3 cols, data in last 0052 // row M+1 : abcdFinal from boundary conditions 0053 // cstride gives the stride between values in coefs. 0054 // On exit, coefs with contain interpolating B-spline coefs 0055 void 0056 solve_periodic_interp_1d_s (float bands[], float coefs[], 0057 int M, int cstride); 0058 0059 // On input, bands should be filled with: 0060 // row 0 : abcdInitial from boundary conditions 0061 // rows 1:M: basis functions in first 3 cols, data in last 0062 // row M+1 : abcdFinal from boundary conditions 0063 // cstride gives the stride between values in coefs. 0064 // On exit, coefs with contain interpolating B-spline coefs 0065 void 0066 solve_antiperiodic_interp_1d_s (float bands[], float coefs[], 0067 int M, int cstride); 0068 0069 void 0070 find_coefs_1d_s (Ugrid grid, BCtype_s bc, 0071 float *data, intptr_t dstride, 0072 float *coefs, intptr_t cstride); 0073 0074 //////////////////////////////////////////////////////////// 0075 //////////////////////////////////////////////////////////// 0076 //// Single-Precision, Real Creation Routines //// 0077 //////////////////////////////////////////////////////////// 0078 //////////////////////////////////////////////////////////// 0079 0080 // On input, bands should be filled with: 0081 // row 0 : abcdInitial from boundary conditions 0082 // rows 1:M: basis functions in first 3 cols, data in last 0083 // row M+1 : abcdFinal from boundary conditions 0084 // cstride gives the stride between values in coefs. 0085 // On exit, coefs with contain interpolating B-spline coefs 0086 multi_UBspline_1d_s* 0087 create_multi_UBspline_1d_s (Ugrid x_grid, BCtype_s xBC, int num_splines) 0088 { 0089 // Create new spline 0090 multi_UBspline_1d_s* restrict spline = static_cast<multi_UBspline_1d_s*>(malloc(sizeof(multi_UBspline_1d_s))); 0091 if (!spline) { 0092 fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_1d_s.\n"); 0093 abort(); 0094 } 0095 spline->spcode = MULTI_U1D; 0096 spline->tcode = SINGLE_REAL; 0097 spline->xBC = xBC; spline->x_grid = x_grid; 0098 spline->num_splines = num_splines; 0099 0100 // Setup internal variables 0101 int Mx = x_grid.num; 0102 int Nx; 0103 0104 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { 0105 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); 0106 Nx = Mx+3; 0107 } 0108 else { 0109 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); 0110 Nx = Mx+2; 0111 } 0112 0113 int N = num_splines; 0114 #ifdef HAVE_SSE 0115 if (N % 4) 0116 N += 4 - (N % 4); 0117 #endif 0118 0119 spline->x_stride = N; 0120 x_grid.delta_inv = 1.0/x_grid.delta; 0121 spline->x_grid = x_grid; 0122 #ifndef HAVE_POSIX_MEMALIGN 0123 spline->coefs = (float*)malloc (sizeof(float)*Nx*N); 0124 #else 0125 posix_memalign ((void**)&spline->coefs, 64, (sizeof(float)*Nx*N)); 0126 #endif 0127 #ifdef HAVE_SSE 0128 init_sse_data(); 0129 #endif 0130 if (!spline->coefs) { 0131 fprintf (stderr, "Out of memory allocating spline coefficient in create_multi_UBspline_1d_s.\n"); 0132 abort(); 0133 } 0134 0135 0136 return spline; 0137 } 0138 0139 void 0140 set_multi_UBspline_1d_s (multi_UBspline_1d_s *spline, int num, 0141 float *data) 0142 { 0143 float *coefs = spline->coefs + num; 0144 int xs = spline->x_stride; 0145 find_coefs_1d_s (spline->x_grid, spline->xBC, data, 1, 0146 coefs, xs); 0147 } 0148 0149 0150 multi_UBspline_2d_s* 0151 create_multi_UBspline_2d_s (Ugrid x_grid, Ugrid y_grid, 0152 BCtype_s xBC, BCtype_s yBC, int num_splines) 0153 { 0154 // Create new spline 0155 multi_UBspline_2d_s* restrict spline = static_cast<multi_UBspline_2d_s*>(malloc(sizeof(multi_UBspline_2d_s))); 0156 if (!spline) { 0157 fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_2d_s.\n"); 0158 abort(); 0159 } 0160 spline->spcode = MULTI_U2D; 0161 spline->tcode = SINGLE_REAL; 0162 spline->xBC = xBC; 0163 spline->yBC = yBC; 0164 spline->num_splines = num_splines; 0165 // Setup internal variables 0166 int Mx = x_grid.num; 0167 int My = y_grid.num; 0168 int Nx, Ny; 0169 0170 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) 0171 Nx = Mx+3; 0172 else 0173 Nx = Mx+2; 0174 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 0175 0176 x_grid.delta_inv = 1.0/x_grid.delta; 0177 spline->x_grid = x_grid; 0178 0179 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) 0180 Ny = My+3; 0181 else 0182 Ny = My+2; 0183 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 0184 y_grid.delta_inv = 1.0/y_grid.delta; 0185 spline->y_grid = y_grid; 0186 0187 int N = num_splines; 0188 #ifdef HAVE_SSE 0189 if (N % 4) 0190 N += 4 - (N % 4); 0191 #endif 0192 0193 0194 spline->x_stride = Ny*N; 0195 spline->y_stride = N; 0196 0197 #ifndef HAVE_POSIX_MEMALIGN 0198 spline->coefs = (float*)malloc (sizeof(float)*Nx*Ny*N); 0199 #else 0200 posix_memalign ((void**)&spline->coefs, 64, 0201 sizeof(float)*Nx*Ny*N); 0202 #endif 0203 #ifdef HAVE_SSE 0204 init_sse_data(); 0205 #endif 0206 if (!spline->coefs) { 0207 fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_2d_s.\n"); 0208 abort(); 0209 } 0210 0211 return spline; 0212 } 0213 0214 void 0215 set_multi_UBspline_2d_s (multi_UBspline_2d_s* spline, int num, float *data) 0216 { 0217 int Mx = spline->x_grid.num; 0218 int My = spline->y_grid.num; 0219 int Nx, Ny; 0220 0221 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 0222 Nx = Mx+3; 0223 else 0224 Nx = Mx+2; 0225 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) 0226 Ny = My+3; 0227 else 0228 Ny = My+2; 0229 0230 float *coefs = spline->coefs + num; 0231 int ys = spline->y_stride; 0232 // First, solve in the X-direction 0233 for (int iy=0; iy<My; iy++) { 0234 intptr_t doffset = iy; 0235 intptr_t coffset = iy*ys; 0236 find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, (intptr_t)My, 0237 coefs+coffset, (intptr_t)Ny*ys); 0238 } 0239 0240 // Now, solve in the Y-direction 0241 for (int ix=0; ix<Nx; ix++) { 0242 intptr_t doffset = ix*Ny*ys; 0243 intptr_t coffset = ix*Ny*ys; 0244 find_coefs_1d_s (spline->y_grid, spline->yBC, coefs+doffset, (intptr_t)ys, 0245 coefs+coffset, (intptr_t)ys); 0246 } 0247 } 0248 0249 0250 multi_UBspline_3d_s* 0251 create_multi_UBspline_3d_s (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, 0252 BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, 0253 int num_splines) 0254 { 0255 // Create new spline 0256 multi_UBspline_3d_s* restrict spline = static_cast<multi_UBspline_3d_s*>(malloc(sizeof(multi_UBspline_3d_s))); 0257 if (!spline) { 0258 fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_3d_s.\n"); 0259 abort(); 0260 } 0261 spline->spcode = MULTI_U3D; 0262 spline->tcode = SINGLE_REAL; 0263 spline->xBC = xBC; 0264 spline->yBC = yBC; 0265 spline->zBC = zBC; 0266 spline->num_splines = num_splines; 0267 // Setup internal variables 0268 int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; 0269 int Nx, Ny, Nz; 0270 0271 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) 0272 Nx = Mx+3; 0273 else 0274 Nx = Mx+2; 0275 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 0276 x_grid.delta_inv = 1.0/x_grid.delta; 0277 spline->x_grid = x_grid; 0278 0279 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) 0280 Ny = My+3; 0281 else 0282 Ny = My+2; 0283 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 0284 y_grid.delta_inv = 1.0/y_grid.delta; 0285 spline->y_grid = y_grid; 0286 0287 if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) 0288 Nz = Mz+3; 0289 else 0290 Nz = Mz+2; 0291 z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); 0292 z_grid.delta_inv = 1.0/z_grid.delta; 0293 spline->z_grid = z_grid; 0294 0295 int N = num_splines; 0296 #ifdef HAVE_SSE 0297 if (N % 4) 0298 N += 4 - (N % 4); 0299 #endif 0300 0301 spline->x_stride = Ny*Nz*N; 0302 spline->y_stride = Nz*N; 0303 spline->z_stride = N; 0304 0305 #ifndef HAVE_POSIX_MEMALIGN 0306 spline->coefs = new float[Nx*Ny*Nz*N]; 0307 #else 0308 posix_memalign ((void**)&spline->coefs, 64, 0309 (sizeof(float)*Nx*Ny*Nz*N)); 0310 #endif 0311 #ifdef HAVE_SSE 0312 init_sse_data(); 0313 #endif 0314 if (!spline->coefs) { 0315 fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_3d_s.\n"); 0316 abort(); 0317 } 0318 0319 return spline; 0320 } 0321 0322 void 0323 set_multi_UBspline_3d_s (multi_UBspline_3d_s* spline, int num, float *data) 0324 { 0325 int Mx = spline->x_grid.num; 0326 int My = spline->y_grid.num; 0327 int Mz = spline->z_grid.num; 0328 int Nx, Ny, Nz; 0329 0330 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 0331 Nx = Mx+3; 0332 else 0333 Nx = Mx+2; 0334 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) 0335 Ny = My+3; 0336 else 0337 Ny = My+2; 0338 if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) 0339 Nz = Mz+3; 0340 else 0341 Nz = Mz+2; 0342 0343 float *coefs = spline->coefs + num; 0344 0345 int zs = spline->z_stride; 0346 // First, solve in the X-direction 0347 for (int iy=0; iy<My; iy++) 0348 for (int iz=0; iz<Mz; iz++) { 0349 intptr_t doffset = iy*Mz+iz; 0350 intptr_t coffset = (iy*Nz+iz)*zs; 0351 find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, (intptr_t)My*Mz, 0352 coefs+coffset, (intptr_t)Ny*Nz*zs); 0353 } 0354 0355 // Now, solve in the Y-direction 0356 for (int ix=0; ix<Nx; ix++) 0357 for (int iz=0; iz<Nz; iz++) { 0358 intptr_t doffset = (ix*Ny*Nz + iz)*zs; 0359 intptr_t coffset = (ix*Ny*Nz + iz)*zs; 0360 find_coefs_1d_s (spline->y_grid, spline->yBC, coefs+doffset, (intptr_t)Nz*zs, 0361 coefs+coffset, (intptr_t)Nz*zs); 0362 } 0363 0364 // Now, solve in the Z-direction 0365 for (int ix=0; ix<Nx; ix++) 0366 for (int iy=0; iy<Ny; iy++) { 0367 intptr_t doffset = ((ix*Ny+iy)*Nz)*zs; 0368 intptr_t coffset = ((ix*Ny+iy)*Nz)*zs; 0369 find_coefs_1d_s (spline->z_grid, spline->zBC, coefs+doffset, 0370 (intptr_t)zs, coefs+coffset, (intptr_t)zs); 0371 } 0372 } 0373 0374 0375 //////////////////////////////////////////////////////////// 0376 //////////////////////////////////////////////////////////// 0377 //// Single-Precision, Complex Creation Routines //// 0378 //////////////////////////////////////////////////////////// 0379 //////////////////////////////////////////////////////////// 0380 0381 // On input, bands should be filled with: 0382 // row 0 : abcdInitial from boundary conditions 0383 // rows 1:M: basis functions in first 3 cols, data in last 0384 // row M+1 : abcdFinal from boundary conditions 0385 // cstride gives the stride between values in coefs. 0386 // On exit, coefs with contain interpolating B-spline coefs 0387 multi_UBspline_1d_c* 0388 create_multi_UBspline_1d_c (Ugrid x_grid, BCtype_c xBC, int num_splines) 0389 { 0390 // Create new spline 0391 multi_UBspline_1d_c* restrict spline = static_cast<multi_UBspline_1d_c*>(malloc(sizeof(multi_UBspline_1d_c))); 0392 if (!spline) { 0393 fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_1d_c.\n"); 0394 abort(); 0395 } 0396 spline->spcode = MULTI_U1D; 0397 spline->tcode = SINGLE_COMPLEX; 0398 spline->xBC = xBC; 0399 spline->num_splines = num_splines; 0400 // Setup internal variables 0401 int M = x_grid.num; 0402 int N; 0403 0404 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { 0405 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); 0406 N = M+3; 0407 } 0408 else { 0409 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); 0410 N = M+2; 0411 } 0412 0413 x_grid.delta_inv = 1.0/x_grid.delta; 0414 spline->x_grid = x_grid; 0415 spline->x_stride = num_splines; 0416 #ifndef HAVE_POSIX_MEMALIGN 0417 spline->coefs = (complex_float*)malloc (2*sizeof(float)*N*num_splines); 0418 #else 0419 posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(float)*N*num_splines); 0420 #endif 0421 #ifdef HAVE_SSE 0422 init_sse_data(); 0423 #endif 0424 if (!spline->coefs) { 0425 fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_1d_c.\n"); 0426 abort(); 0427 } 0428 0429 return spline; 0430 } 0431 0432 void 0433 set_multi_UBspline_1d_c (multi_UBspline_1d_c* spline, int num, complex_float *data) 0434 { 0435 complex_float *coefs = spline->coefs + num; 0436 0437 BCtype_s xBC_r, xBC_i; 0438 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; 0439 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; 0440 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; 0441 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; 0442 0443 int xs = spline->x_stride; 0444 // Real part 0445 find_coefs_1d_s (spline->x_grid, xBC_r, 0446 (float*)data, (intptr_t)2, (float*)coefs, (intptr_t)2*xs); 0447 // Imaginarty part 0448 find_coefs_1d_s (spline->x_grid, xBC_i, 0449 ((float*)data)+1, (intptr_t)2, ((float*)coefs+1), (intptr_t)2*xs); 0450 } 0451 0452 0453 0454 multi_UBspline_2d_c* 0455 create_multi_UBspline_2d_c (Ugrid x_grid, Ugrid y_grid, 0456 BCtype_c xBC, BCtype_c yBC, int num_splines) 0457 { 0458 // Create new spline 0459 multi_UBspline_2d_c* restrict spline = static_cast<multi_UBspline_2d_c*>(malloc(sizeof(multi_UBspline_2d_c))); 0460 if (!spline) { 0461 fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_2d_c.\n"); 0462 abort(); 0463 } 0464 spline->spcode = MULTI_U2D; 0465 spline->tcode = SINGLE_COMPLEX; 0466 spline->xBC = xBC; 0467 spline->yBC = yBC; 0468 spline->num_splines = num_splines; 0469 0470 // Setup internal variables 0471 int Mx = x_grid.num; 0472 int My = y_grid.num; 0473 int Nx, Ny; 0474 0475 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) 0476 Nx = Mx+3; 0477 else 0478 Nx = Mx+2; 0479 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 0480 x_grid.delta_inv = 1.0/x_grid.delta; 0481 spline->x_grid = x_grid; 0482 0483 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) 0484 Ny = My+3; 0485 else 0486 Ny = My+2; 0487 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 0488 y_grid.delta_inv = 1.0/y_grid.delta; 0489 spline->y_grid = y_grid; 0490 0491 int N = num_splines; 0492 #ifdef HAVE_SSE 0493 if (N % 2) 0494 N++; 0495 #endif 0496 0497 spline->x_stride = Ny*N; 0498 spline->y_stride = N; 0499 0500 #ifndef HAVE_POSIX_MEMALIGN 0501 spline->coefs = (complex_float*)malloc (2*sizeof(float)*Nx*Ny*N); 0502 spline->lapl2 = (complex_float*)malloc (4*sizeof(float)*N); 0503 #else 0504 posix_memalign ((void**)&spline->coefs, 64, 0505 2*sizeof(float)*Nx*Ny*N); 0506 posix_memalign ((void**)&spline->lapl2, 64, 0507 4*sizeof(float)*N); 0508 #endif 0509 #ifdef HAVE_SSE 0510 init_sse_data(); 0511 #endif 0512 if (!spline->coefs || !spline->lapl2) { 0513 fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_2d_c.\n"); 0514 abort(); 0515 } 0516 return spline; 0517 } 0518 0519 0520 void 0521 set_multi_UBspline_2d_c (multi_UBspline_2d_c* spline, int num, complex_float *data) 0522 { 0523 // Setup internal variables 0524 int Mx = spline->x_grid.num; 0525 int My = spline->y_grid.num; 0526 int Nx, Ny; 0527 0528 complex_float* coefs = spline->coefs + num; 0529 0530 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 0531 Nx = Mx+3; 0532 else 0533 Nx = Mx+2; 0534 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) 0535 Ny = My+3; 0536 else 0537 Ny = My+2; 0538 0539 BCtype_s xBC_r, xBC_i, yBC_r, yBC_i; 0540 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; 0541 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; 0542 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; 0543 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; 0544 yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; 0545 yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; 0546 yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; 0547 yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; 0548 0549 int ys = spline->y_stride; 0550 // First, solve in the X-direction 0551 for (int iy=0; iy<My; iy++) { 0552 intptr_t doffset = (2*iy); 0553 intptr_t coffset = (2*iy)*ys; 0554 // Real part 0555 find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, (intptr_t)2*My, 0556 (float*)coefs+coffset, (intptr_t)2*Ny*ys); 0557 // Imag part 0558 find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, (intptr_t)2*My, 0559 ((float*)coefs)+coffset+1, (intptr_t)2*Ny*ys); 0560 } 0561 0562 // Now, solve in the Y-direction 0563 for (int ix=0; ix<Nx; ix++) { 0564 intptr_t doffset = (2*ix*Ny)*ys; 0565 intptr_t coffset = (2*ix*Ny)*ys; 0566 // Real part 0567 find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)coefs)+doffset, 0568 (intptr_t)2*ys, ((float*)coefs)+coffset, (intptr_t)2*ys); 0569 // Imag part 0570 find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)coefs)+doffset+1, 0571 (intptr_t)2*ys, ((float*)coefs)+coffset+1, (intptr_t)2*ys); 0572 } 0573 } 0574 0575 multi_UBspline_3d_c* 0576 create_multi_UBspline_3d_c (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, 0577 BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, 0578 int num_splines) 0579 { 0580 // Create new spline 0581 multi_UBspline_3d_c* restrict spline = static_cast<multi_UBspline_3d_c*>(malloc(sizeof(multi_UBspline_3d_c))); 0582 if (!spline) { 0583 fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_3d_c.\n"); 0584 abort(); 0585 } 0586 spline->spcode = MULTI_U3D; 0587 spline->tcode = SINGLE_COMPLEX; 0588 spline->xBC = xBC; 0589 spline->yBC = yBC; 0590 spline->zBC = zBC; 0591 spline->num_splines = num_splines; 0592 0593 // Setup internal variables 0594 int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; 0595 int Nx, Ny, Nz; 0596 0597 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) 0598 Nx = Mx+3; 0599 else 0600 Nx = Mx+2; 0601 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 0602 x_grid.delta_inv = 1.0/x_grid.delta; 0603 spline->x_grid = x_grid; 0604 0605 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) 0606 Ny = My+3; 0607 else 0608 Ny = My+2; 0609 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 0610 y_grid.delta_inv = 1.0/y_grid.delta; 0611 spline->y_grid = y_grid; 0612 0613 if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) 0614 Nz = Mz+3; 0615 else 0616 Nz = Mz+2; 0617 z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); 0618 z_grid.delta_inv = 1.0/z_grid.delta; 0619 spline->z_grid = z_grid; 0620 0621 int N = spline->num_splines; 0622 #ifdef HAVE_SSE 0623 if (N % 2) 0624 N++; 0625 #endif 0626 0627 spline->x_stride = Ny*Nz*N; 0628 spline->y_stride = Nz*N; 0629 spline->z_stride = N; 0630 0631 #ifndef HAVE_POSIX_MEMALIGN 0632 spline->coefs = (complex_float*)malloc (2*sizeof(float)*Nx*Ny*Nz*N); 0633 spline->lapl3 = (complex_float*)malloc (6*sizeof(float)*N); 0634 #else 0635 posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(float)*Nx*Ny*Nz*N); 0636 posix_memalign ((void**)&spline->lapl3, 64, 6*sizeof(float)*N); 0637 #endif 0638 #ifdef HAVE_SSE 0639 init_sse_data(); 0640 #endif 0641 if (!spline->coefs || !spline->lapl3) { 0642 fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_3d_c.\n"); 0643 abort(); 0644 } 0645 0646 return spline; 0647 } 0648 0649 void 0650 set_multi_UBspline_3d_c (multi_UBspline_3d_c* spline, int num, complex_float *data) 0651 { 0652 // Setup internal variables 0653 int Mx = spline->x_grid.num; 0654 int My = spline->y_grid.num; 0655 int Mz = spline->z_grid.num; 0656 int Nx, Ny, Nz; 0657 0658 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 0659 Nx = Mx+3; 0660 else 0661 Nx = Mx+2; 0662 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) 0663 Ny = My+3; 0664 else 0665 Ny = My+2; 0666 if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) 0667 Nz = Mz+3; 0668 else 0669 Nz = Mz+2; 0670 0671 BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; 0672 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; 0673 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; 0674 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; 0675 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; 0676 yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; 0677 yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; 0678 yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; 0679 yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; 0680 zBC_r.lCode = spline->zBC.lCode; zBC_r.rCode = spline->zBC.rCode; 0681 zBC_r.lVal = spline->zBC.lVal_r; zBC_r.rVal = spline->zBC.rVal_r; 0682 zBC_i.lCode = spline->zBC.lCode; zBC_i.rCode = spline->zBC.rCode; 0683 zBC_i.lVal = spline->zBC.lVal_i; zBC_i.rVal = spline->zBC.rVal_i; 0684 0685 complex_float *coefs = spline->coefs + num; 0686 int zs = spline->z_stride; 0687 // First, solve in the X-direction 0688 for (int iy=0; iy<My; iy++) 0689 for (int iz=0; iz<Mz; iz++) { 0690 intptr_t doffset = 2*(iy*Mz+iz); 0691 intptr_t coffset = 2*(iy*Nz+iz)*zs; 0692 // Real part 0693 find_coefs_1d_s (spline->x_grid, xBC_r, 0694 ((float*)data)+doffset, (intptr_t)2*My*Mz, 0695 ((float*)coefs)+coffset, (intptr_t)2*Ny*Nz*zs); 0696 // Imag part 0697 find_coefs_1d_s (spline->x_grid, xBC_i, 0698 ((float*)data)+doffset+1, (intptr_t)2*My*Mz, 0699 ((float*)coefs)+coffset+1, (intptr_t)2*Ny*Nz*zs); 0700 } 0701 0702 // Now, solve in the Y-direction 0703 for (int ix=0; ix<Nx; ix++) 0704 for (int iz=0; iz<Nz; iz++) { 0705 intptr_t doffset = 2*(ix*Ny*Nz + iz)*zs; 0706 intptr_t coffset = 2*(ix*Ny*Nz + iz)*zs; 0707 // Real part 0708 find_coefs_1d_s (spline->y_grid, yBC_r, 0709 ((float*)coefs)+doffset, (intptr_t)2*Nz*zs, 0710 ((float*)coefs)+coffset, (intptr_t)2*Nz*zs); 0711 // Imag part 0712 find_coefs_1d_s (spline->y_grid, yBC_i, 0713 ((float*)coefs)+doffset+1, (intptr_t)2*Nz*zs, 0714 ((float*)coefs)+coffset+1, (intptr_t)2*Nz*zs); 0715 } 0716 0717 // Now, solve in the Z-direction 0718 for (int ix=0; ix<Nx; ix++) 0719 for (int iy=0; iy<Ny; iy++) { 0720 intptr_t doffset = 2*((ix*Ny+iy)*Nz)*zs; 0721 intptr_t coffset = 2*((ix*Ny+iy)*Nz)*zs; 0722 // Real part 0723 find_coefs_1d_s (spline->z_grid, zBC_r, 0724 ((float*)coefs)+doffset, (intptr_t)2*zs, 0725 ((float*)coefs)+coffset, (intptr_t)2*zs); 0726 // Imag part 0727 find_coefs_1d_s (spline->z_grid, zBC_i, 0728 ((float*)coefs)+doffset+1, (intptr_t)2*zs, 0729 ((float*)coefs)+coffset+1, (intptr_t)2*zs); 0730 } 0731 } 0732 0733 0734 //////////////////////////////////////////////////////////// 0735 //////////////////////////////////////////////////////////// 0736 //// Double-Precision, Real Creation Routines //// 0737 //////////////////////////////////////////////////////////// 0738 //////////////////////////////////////////////////////////// 0739 0740 // On input, bands should be filled with: 0741 // row 0 : abcdInitial from boundary conditions 0742 // rows 1:M: basis functions in first 3 cols, data in last 0743 // row M+1 : abcdFinal from boundary conditions 0744 // cstride gives the stride between values in coefs. 0745 // On exit, coefs with contain interpolating B-spline coefs 0746 void 0747 solve_deriv_interp_1d_d (double bands[], double coefs[], 0748 int M, int cstride); 0749 0750 // On input, bands should be filled with: 0751 // row 0 : abcdInitial from boundary conditions 0752 // rows 1:M: basis functions in first 3 cols, data in last 0753 // row M+1 : abcdFinal from boundary conditions 0754 // cstride gives the stride between values in coefs. 0755 // On exit, coefs with contain interpolating B-spline coefs 0756 void 0757 solve_periodic_interp_1d_d (double bands[], double coefs[], 0758 int M, int cstride); 0759 0760 void 0761 find_coefs_1d_d (Ugrid grid, BCtype_d bc, 0762 double *data, intptr_t dstride, 0763 double *coefs, intptr_t cstride); 0764 0765 multi_UBspline_1d_d* 0766 create_multi_UBspline_1d_d (Ugrid x_grid, BCtype_d xBC, int num_splines) 0767 { 0768 // Create new spline 0769 multi_UBspline_1d_d* restrict spline = static_cast<multi_UBspline_1d_d*>(malloc(sizeof(multi_UBspline_1d_d))); 0770 if (!spline) { 0771 fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_1d_d.\n"); 0772 abort(); 0773 } 0774 spline->spcode = MULTI_U1D; 0775 spline->tcode = DOUBLE_REAL; 0776 spline->xBC = xBC; 0777 spline->num_splines = num_splines; 0778 0779 // Setup internal variables 0780 int Mx = x_grid.num; 0781 int Nx; 0782 0783 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { 0784 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); 0785 Nx = Mx+3; 0786 } 0787 else { 0788 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); 0789 Nx = Mx+2; 0790 } 0791 0792 x_grid.delta_inv = 1.0/x_grid.delta; 0793 spline->x_grid = x_grid; 0794 0795 int N = num_splines; 0796 #ifdef HAVE_SSE2 0797 // We must pad to keep data aligned for SSE operations 0798 if (N & 1) 0799 N++; 0800 #endif 0801 spline->x_stride = N; 0802 0803 #ifndef HAVE_POSIX_MEMALIGN 0804 spline->coefs = (double*)malloc (sizeof(double)*Nx*N); 0805 0806 #else 0807 posix_memalign ((void**)&spline->coefs, 64, sizeof(double)*Nx*N); 0808 #endif 0809 #ifdef HAVE_SSE2 0810 init_sse_data(); 0811 #endif 0812 if (!spline->coefs) { 0813 fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_1d_d.\n"); 0814 abort(); 0815 } 0816 0817 return spline; 0818 } 0819 0820 void 0821 set_multi_UBspline_1d_d (multi_UBspline_1d_d* spline, int num, double *data) 0822 { 0823 double *coefs = spline->coefs + num; 0824 int xs = spline->x_stride; 0825 find_coefs_1d_d (spline->x_grid, spline->xBC, data, 1, coefs, xs); 0826 } 0827 0828 void 0829 set_multi_UBspline_1d_d_BC (multi_UBspline_1d_d* spline, int num, double *data, 0830 BCtype_d xBC) 0831 { 0832 double *coefs = spline->coefs + num; 0833 int xs = spline->x_stride; 0834 find_coefs_1d_d (spline->x_grid, xBC, data, 1, coefs, xs); 0835 } 0836 0837 0838 multi_UBspline_2d_d* 0839 create_multi_UBspline_2d_d (Ugrid x_grid, Ugrid y_grid, 0840 BCtype_d xBC, BCtype_d yBC, int num_splines) 0841 { 0842 // Create new spline 0843 multi_UBspline_2d_d* restrict spline = static_cast<multi_UBspline_2d_d*>(malloc(sizeof(multi_UBspline_2d_d))); 0844 if (!spline) { 0845 fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_2d_d.\n"); 0846 abort(); 0847 } 0848 spline->spcode = MULTI_U2D; 0849 spline->tcode = DOUBLE_REAL; 0850 spline->xBC = xBC; 0851 spline->yBC = yBC; 0852 spline->num_splines = num_splines; 0853 0854 // Setup internal variables 0855 int Mx = x_grid.num; 0856 int My = y_grid.num; 0857 int Nx, Ny; 0858 0859 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) 0860 Nx = Mx+3; 0861 else 0862 Nx = Mx+2; 0863 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 0864 x_grid.delta_inv = 1.0/x_grid.delta; 0865 spline->x_grid = x_grid; 0866 0867 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) 0868 Ny = My+3; 0869 else 0870 Ny = My+2; 0871 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 0872 y_grid.delta_inv = 1.0/y_grid.delta; 0873 spline->y_grid = y_grid; 0874 0875 int N = num_splines; 0876 0877 #ifdef HAVE_SSE2 0878 // We must pad to keep data align for SSE operations 0879 if (num_splines & 1) 0880 N++; 0881 #endif 0882 spline->x_stride = Ny*N; 0883 spline->y_stride = N; 0884 0885 #ifndef HAVE_POSIX_MEMALIGN 0886 spline->coefs = (double*)malloc (sizeof(double)*Nx*Ny*N); 0887 #else 0888 posix_memalign ((void**)&spline->coefs, 64, (sizeof(double)*Nx*Ny*N)); 0889 #endif 0890 #ifdef HAVE_SSE2 0891 init_sse_data(); 0892 #endif 0893 if (!spline->coefs) { 0894 fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_2d_d.\n"); 0895 abort(); 0896 } 0897 0898 return spline; 0899 } 0900 0901 void 0902 set_multi_UBspline_2d_d (multi_UBspline_2d_d* spline, int num, double *data) 0903 { 0904 int Mx = spline->x_grid.num; 0905 int My = spline->y_grid.num; 0906 int Nx, Ny; 0907 double *coefs = spline->coefs + num; 0908 0909 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 0910 Nx = Mx+3; 0911 else 0912 Nx = Mx+2; 0913 0914 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) 0915 Ny = My+3; 0916 else 0917 Ny = My+2; 0918 0919 int ys = spline->y_stride; 0920 // First, solve in the X-direction 0921 for (int iy=0; iy<My; iy++) { 0922 intptr_t doffset = iy; 0923 intptr_t coffset = iy*ys; 0924 find_coefs_1d_d (spline->x_grid, spline->xBC, 0925 data+doffset, (intptr_t)My, 0926 coefs+coffset, (intptr_t)Ny*ys); 0927 } 0928 0929 // Now, solve in the Y-direction 0930 for (int ix=0; ix<Nx; ix++) { 0931 intptr_t doffset = ix*Ny*ys; 0932 intptr_t coffset = ix*Ny*ys; 0933 find_coefs_1d_d (spline->y_grid, spline->yBC, 0934 coefs+doffset, (intptr_t)ys, 0935 coefs+coffset, (intptr_t)ys); 0936 } 0937 } 0938 0939 0940 multi_UBspline_3d_d* 0941 create_multi_UBspline_3d_d (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, 0942 BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, 0943 int num_splines) 0944 { 0945 // Create new spline 0946 multi_UBspline_3d_d* restrict spline; 0947 #ifdef HAVE_POSIX_MEMALIGN 0948 posix_memalign ((void**)&spline, 64, (size_t)sizeof(multi_UBspline_3d_d)); 0949 #else 0950 spline = static_cast<multi_UBspline_3d_d*>(malloc(sizeof(multi_UBspline_3d_d))); 0951 #endif 0952 if (!spline) { 0953 fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_3d_d.\n"); 0954 abort(); 0955 } 0956 spline->spcode = MULTI_U3D; 0957 spline->tcode = DOUBLE_REAL; 0958 spline->xBC = xBC; 0959 spline->yBC = yBC; 0960 spline->zBC = zBC; 0961 spline->num_splines = num_splines; 0962 0963 // Setup internal variables 0964 int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; 0965 int Nx, Ny, Nz; 0966 0967 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) 0968 Nx = Mx+3; 0969 else 0970 Nx = Mx+2; 0971 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 0972 x_grid.delta_inv = 1.0/x_grid.delta; 0973 spline->x_grid = x_grid; 0974 0975 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) 0976 Ny = My+3; 0977 else 0978 Ny = My+2; 0979 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 0980 y_grid.delta_inv = 1.0/y_grid.delta; 0981 spline->y_grid = y_grid; 0982 0983 if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) 0984 Nz = Mz+3; 0985 else 0986 Nz = Mz+2; 0987 z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); 0988 z_grid.delta_inv = 1.0/z_grid.delta; 0989 spline->z_grid = z_grid; 0990 0991 0992 int N = num_splines; 0993 #if defined HAVE_SSE2 || defined HAVE_VSX 0994 // We must pad to keep data align for SSE operations 0995 if (N & 1) 0996 N++; 0997 #endif 0998 0999 spline->x_stride = Ny*Nz*N; 1000 spline->y_stride = Nz*N; 1001 spline->z_stride = N; 1002 1003 #ifdef HAVE_POSIX_MEMALIGN 1004 posix_memalign ((void**)&spline->coefs, 64, ((size_t)sizeof(double)*Nx*Ny*Nz*N)); 1005 #else 1006 spline->coefs = new double[Nx*Ny*Nz*N]; 1007 #endif 1008 #ifdef HAVE_SSE2 1009 init_sse_data(); 1010 #endif 1011 if (!spline->coefs) { 1012 fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_3d_d.\n"); 1013 abort(); 1014 } 1015 1016 return spline; 1017 } 1018 1019 void 1020 set_multi_UBspline_3d_d (multi_UBspline_3d_d* spline, int num, double *data) 1021 { 1022 int Mx = spline->x_grid.num; 1023 int My = spline->y_grid.num; 1024 int Mz = spline->z_grid.num; 1025 int Nx, Ny, Nz; 1026 1027 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 1028 Nx = Mx+3; 1029 else 1030 Nx = Mx+2; 1031 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) 1032 Ny = My+3; 1033 else 1034 Ny = My+2; 1035 if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) 1036 Nz = Mz+3; 1037 else 1038 Nz = Mz+2; 1039 1040 double *coefs = spline->coefs + num; 1041 intptr_t zs = spline->z_stride; 1042 1043 // First, solve in the X-direction 1044 for (int iy=0; iy<My; iy++) 1045 for (int iz=0; iz<Mz; iz++) { 1046 intptr_t doffset = iy*Mz+iz; 1047 intptr_t coffset = (iy*Nz+iz)*zs; 1048 find_coefs_1d_d (spline->x_grid, spline->xBC, 1049 data+doffset, (intptr_t)My*Mz, 1050 coefs+coffset, (intptr_t)Ny*Nz*zs); 1051 } 1052 1053 // Now, solve in the Y-direction 1054 for (int ix=0; ix<Nx; ix++) 1055 for (int iz=0; iz<Nz; iz++) { 1056 intptr_t doffset = (ix*Ny*Nz + iz)*zs; 1057 intptr_t coffset = (ix*Ny*Nz + iz)*zs; 1058 find_coefs_1d_d (spline->y_grid, spline->yBC, 1059 coefs+doffset, (intptr_t)Nz*zs, 1060 coefs+coffset, (intptr_t)Nz*zs); 1061 } 1062 1063 // Now, solve in the Z-direction 1064 for (int ix=0; ix<Nx; ix++) 1065 for (int iy=0; iy<Ny; iy++) { 1066 intptr_t doffset = (ix*Ny+iy)*Nz*zs; 1067 intptr_t coffset = (ix*Ny+iy)*Nz*zs; 1068 find_coefs_1d_d (spline->z_grid, spline->zBC, 1069 coefs+doffset, (intptr_t)zs, 1070 coefs+coffset, (intptr_t)zs); 1071 } 1072 } 1073 1074 1075 //////////////////////////////////////////////////////////// 1076 //////////////////////////////////////////////////////////// 1077 //// Double-Precision, Complex Creation Routines //// 1078 //////////////////////////////////////////////////////////// 1079 //////////////////////////////////////////////////////////// 1080 1081 // On input, bands should be filled with: 1082 // row 0 : abcdInitial from boundary conditions 1083 // rows 1:M: basis functions in first 3 cols, data in last 1084 // row M+1 : abcdFinal from boundary conditions 1085 // cstride gives the stride between values in coefs. 1086 // On exit, coefs with contain interpolating B-spline coefs 1087 1088 1089 multi_UBspline_1d_z* 1090 create_multi_UBspline_1d_z (Ugrid x_grid, BCtype_z xBC, int num_splines) 1091 { 1092 // Create new spline 1093 multi_UBspline_1d_z* restrict spline = static_cast<multi_UBspline_1d_z*>(malloc(sizeof(multi_UBspline_1d_z))); 1094 if (!spline) { 1095 fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_1d_z.\n"); 1096 abort(); 1097 } 1098 spline->spcode = MULTI_U1D; 1099 spline->tcode = DOUBLE_COMPLEX; 1100 spline->xBC = xBC; 1101 spline->num_splines = num_splines; 1102 1103 // Setup internal variables 1104 int Mx = x_grid.num; 1105 int Nx; 1106 1107 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { 1108 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); 1109 Nx = Mx+3; 1110 } 1111 else { 1112 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); 1113 Nx = Mx+2; 1114 } 1115 1116 x_grid.delta_inv = 1.0/x_grid.delta; 1117 spline->x_grid = x_grid; 1118 spline->x_stride = num_splines; 1119 1120 #ifndef HAVE_POSIX_MEMALIGN 1121 spline->coefs = (complex_double*)malloc (2*sizeof(double)*Nx*num_splines); 1122 #else 1123 posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(double)*Nx*num_splines); 1124 #endif 1125 #ifdef HAVE_SSE2 1126 init_sse_data(); 1127 #endif 1128 if (!spline->coefs) { 1129 fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_1d_z.\n"); 1130 abort(); 1131 } 1132 1133 return spline; 1134 } 1135 1136 void 1137 set_multi_UBspline_1d_z (multi_UBspline_1d_z* spline, int num, complex_double *data) 1138 { 1139 // int Mx = spline->x_grid.num; 1140 // Set but not used 1141 // int Nx; 1142 1143 complex_double *coefs = spline->coefs + num; 1144 1145 // if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 1146 // Nx = Mx+3; 1147 // else 1148 // Nx = Mx+2; 1149 1150 BCtype_d xBC_r, xBC_i; 1151 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; 1152 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; 1153 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; 1154 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; 1155 int xs = spline->x_stride; 1156 // Real part 1157 find_coefs_1d_d (spline->x_grid, xBC_r, 1158 (double*)data, (intptr_t)2, 1159 ((double*)coefs), (intptr_t)2*xs); 1160 // Imaginary part 1161 find_coefs_1d_d (spline->x_grid, xBC_i, 1162 ((double*)data)+1, (intptr_t)2, 1163 ((double*)coefs)+1, (intptr_t)2*xs); 1164 1165 } 1166 1167 void 1168 set_multi_UBspline_1d_z_BC (multi_UBspline_1d_z *spline, int num, 1169 complex_double *data, BCtype_z xBC) 1170 { 1171 // int Mx = spline->x_grid.num; 1172 // int Nx; 1173 1174 complex_double *coefs = spline->coefs + num; 1175 1176 // if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) 1177 // Nx = Mx+3; 1178 // else 1179 // Nx = Mx+2; 1180 1181 BCtype_d xBC_r, xBC_i; 1182 xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; 1183 xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; 1184 xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; 1185 xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; 1186 int xs = spline->x_stride; 1187 // Real part 1188 find_coefs_1d_d (spline->x_grid, xBC_r, 1189 (double*)data, (intptr_t)2, 1190 ((double*)coefs), (intptr_t)2*xs); 1191 // Imaginary part 1192 find_coefs_1d_d (spline->x_grid, xBC_i, 1193 ((double*)data)+1, (intptr_t)2, 1194 ((double*)coefs)+1, (intptr_t)2*xs); 1195 } 1196 1197 1198 multi_UBspline_2d_z* 1199 create_multi_UBspline_2d_z (Ugrid x_grid, Ugrid y_grid, 1200 BCtype_z xBC, BCtype_z yBC, int num_splines) 1201 { 1202 // Create new spline 1203 multi_UBspline_2d_z* restrict spline = static_cast<multi_UBspline_2d_z*>(malloc(sizeof(multi_UBspline_2d_z))); 1204 if (!spline) { 1205 fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_2d_z.\n"); 1206 abort(); 1207 } 1208 spline->spcode = MULTI_U2D; 1209 spline->tcode = DOUBLE_COMPLEX; 1210 spline->xBC = xBC; 1211 spline->yBC = yBC; 1212 spline->num_splines = num_splines; 1213 1214 // Setup internal variables 1215 int Mx = x_grid.num; 1216 int My = y_grid.num; 1217 int Nx, Ny; 1218 1219 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) 1220 Nx = Mx+3; 1221 else 1222 Nx = Mx+2; 1223 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 1224 x_grid.delta_inv = 1.0/x_grid.delta; 1225 spline->x_grid = x_grid; 1226 1227 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) 1228 Ny = My+3; 1229 else 1230 Ny = My+2; 1231 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 1232 y_grid.delta_inv = 1.0/y_grid.delta; 1233 spline->y_grid = y_grid; 1234 spline->x_stride = Ny*num_splines; 1235 spline->y_stride = num_splines; 1236 1237 #ifndef HAVE_POSIX_MEMALIGN 1238 spline->coefs = (complex_double*)malloc (2*sizeof(double)*Nx*Ny*num_splines); 1239 spline->lapl2 = (complex_double*)malloc (4*sizeof(double)*num_splines); 1240 #else 1241 posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(double)*Nx*Ny*num_splines); 1242 posix_memalign ((void**)&spline->lapl2, 64, 4*sizeof(double)*num_splines); 1243 #endif 1244 #ifdef HAVE_SSE2 1245 init_sse_data(); 1246 #endif 1247 if (!spline->coefs || !spline->lapl2) { 1248 fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_2d_z.\n"); 1249 abort(); 1250 } 1251 1252 return spline; 1253 } 1254 1255 1256 void 1257 set_multi_UBspline_2d_z (multi_UBspline_2d_z* spline, int num, 1258 complex_double *data) 1259 { 1260 int Mx = spline->x_grid.num; 1261 int My = spline->y_grid.num; 1262 int Nx, Ny; 1263 1264 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 1265 Nx = Mx+3; 1266 else 1267 Nx = Mx+2; 1268 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) 1269 Ny = My+3; 1270 else 1271 Ny = My+2; 1272 1273 BCtype_d xBC_r, xBC_i, yBC_r, yBC_i; 1274 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; 1275 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; 1276 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; 1277 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; 1278 yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; 1279 yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; 1280 yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; 1281 yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; 1282 1283 complex_double *coefs = spline->coefs + num; 1284 int ys = spline->y_stride; 1285 1286 // First, solve in the X-direction 1287 for (int iy=0; iy<My; iy++) { 1288 intptr_t doffset = 2*iy; 1289 intptr_t coffset = 2*iy*ys; 1290 // Real part 1291 find_coefs_1d_d (spline->x_grid, xBC_r, 1292 ((double*)data+doffset), (intptr_t)2*My, 1293 (double*)coefs+coffset, (intptr_t)2*Ny*ys); 1294 // Imag part 1295 find_coefs_1d_d (spline->x_grid, xBC_i, 1296 ((double*)data)+doffset+1, (intptr_t)2*My, 1297 ((double*)coefs)+coffset+1, (intptr_t)2*Ny*ys); 1298 } 1299 1300 // Now, solve in the Y-direction 1301 for (int ix=0; ix<Nx; ix++) { 1302 intptr_t doffset = 2*ix*Ny*ys; 1303 intptr_t coffset = 2*ix*Ny*ys; 1304 // Real part 1305 find_coefs_1d_d (spline->y_grid, yBC_r, 1306 ((double*)coefs)+doffset, (intptr_t)2*ys, 1307 (double*)coefs+coffset, (intptr_t)2*ys); 1308 // Imag part 1309 find_coefs_1d_d (spline->y_grid, yBC_i, 1310 (double*)coefs+doffset+1, (intptr_t)2*ys, 1311 ((double*)coefs)+coffset+1, (intptr_t)2*ys); 1312 } 1313 } 1314 1315 1316 1317 multi_UBspline_3d_z* 1318 create_multi_UBspline_3d_z (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, 1319 BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, 1320 int num_splines) 1321 { 1322 // Create new spline 1323 multi_UBspline_3d_z* restrict spline = static_cast<multi_UBspline_3d_z*>(malloc(sizeof(multi_UBspline_3d_z))); 1324 if (!spline) { 1325 fprintf (stderr, "Out of memory allocating spline in create_multi_UBspline_3d_z.\n"); 1326 abort(); 1327 } 1328 spline->spcode = MULTI_U3D; 1329 spline->tcode = DOUBLE_COMPLEX; 1330 spline->xBC = xBC; 1331 spline->yBC = yBC; 1332 spline->zBC = zBC; 1333 spline->num_splines = num_splines; 1334 1335 // Setup internal variables 1336 int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; 1337 int Nx, Ny, Nz; 1338 1339 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) 1340 Nx = Mx+3; 1341 else 1342 Nx = Mx+2; 1343 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); 1344 x_grid.delta_inv = 1.0/x_grid.delta; 1345 spline->x_grid = x_grid; 1346 1347 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) 1348 Ny = My+3; 1349 else 1350 Ny = My+2; 1351 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); 1352 y_grid.delta_inv = 1.0/y_grid.delta; 1353 spline->y_grid = y_grid; 1354 1355 if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) 1356 Nz = Mz+3; 1357 else 1358 Nz = Mz+2; 1359 z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); 1360 z_grid.delta_inv = 1.0/z_grid.delta; 1361 spline->z_grid = z_grid; 1362 1363 int N = num_splines; 1364 #ifdef HAVE_SSE2 1365 if (N & 3) 1366 N += 4-(N & 3); 1367 #endif 1368 1369 spline->x_stride = (intptr_t)Ny*(intptr_t)Nz*N; 1370 spline->y_stride = Nz*N; 1371 spline->z_stride = N; 1372 1373 #ifndef HAVE_POSIX_MEMALIGN 1374 spline->coefs = new complex_double[Nx*Ny*Nz*N]; 1375 spline->lapl3 = new complex_double[3*N]; 1376 #else 1377 posix_memalign ((void**)&spline->coefs, 64, (size_t)2*sizeof(double)*Nx*Ny*Nz*N); 1378 posix_memalign ((void**)&spline->lapl3, 64, 6*sizeof(double)*N); 1379 #endif 1380 1381 #ifdef HAVE_SSE2 1382 init_sse_data(); 1383 #endif 1384 if (!spline->coefs || !spline->lapl3) { 1385 fprintf (stderr, "Out of memory allocating spline coefficients in create_multi_UBspline_3d_z.\n"); 1386 abort(); 1387 } 1388 1389 return spline; 1390 } 1391 1392 void 1393 set_multi_UBspline_3d_z (multi_UBspline_3d_z* spline, int num, complex_double *data) 1394 { 1395 // Setup internal variables 1396 int Mx = spline->x_grid.num; 1397 int My = spline->y_grid.num; 1398 int Mz = spline->z_grid.num; 1399 int Nx, Ny, Nz; 1400 1401 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) 1402 Nx = Mx+3; 1403 else 1404 Nx = Mx+2; 1405 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) 1406 Ny = My+3; 1407 else 1408 Ny = My+2; 1409 if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) 1410 Nz = Mz+3; 1411 else 1412 Nz = Mz+2; 1413 1414 BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; 1415 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; 1416 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; 1417 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; 1418 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; 1419 yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; 1420 yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; 1421 yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; 1422 yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; 1423 zBC_r.lCode = spline->zBC.lCode; zBC_r.rCode = spline->zBC.rCode; 1424 zBC_r.lVal = spline->zBC.lVal_r; zBC_r.rVal = spline->zBC.rVal_r; 1425 zBC_i.lCode = spline->zBC.lCode; zBC_i.rCode = spline->zBC.rCode; 1426 zBC_i.lVal = spline->zBC.lVal_i; zBC_i.rVal = spline->zBC.rVal_i; 1427 1428 complex_double *coefs = spline->coefs + num; 1429 1430 // unused variable 1431 // int N = spline->num_splines; 1432 int zs = spline->z_stride; 1433 // First, solve in the X-direction 1434 for (int iy=0; iy<My; iy++) 1435 for (int iz=0; iz<Mz; iz++) { 1436 intptr_t doffset = 2*(iy*Mz+iz); 1437 intptr_t coffset = 2*(iy*Nz+iz)*zs; 1438 // Real part 1439 find_coefs_1d_d (spline->x_grid, xBC_r, 1440 ((double*)data)+doffset, (intptr_t)2*My*Mz, 1441 ((double*)coefs)+coffset, (intptr_t)2*Ny*Nz*zs); 1442 // Imag part 1443 find_coefs_1d_d (spline->x_grid, xBC_i, 1444 ((double*)data)+doffset+1, (intptr_t)2*My*Mz, 1445 ((double*)coefs)+coffset+1, (intptr_t)2*Ny*Nz*zs); 1446 } 1447 1448 // Now, solve in the Y-direction 1449 for (int ix=0; ix<Nx; ix++) 1450 for (int iz=0; iz<Nz; iz++) { 1451 intptr_t doffset = 2*(ix*Ny*Nz + iz)*zs; 1452 intptr_t coffset = 2*(ix*Ny*Nz + iz)*zs; 1453 // Real part 1454 find_coefs_1d_d (spline->y_grid, yBC_r, 1455 ((double*)coefs)+doffset, (intptr_t)2*Nz*zs, 1456 ((double*)coefs)+coffset, (intptr_t)2*Nz*zs); 1457 // Imag part 1458 find_coefs_1d_d (spline->y_grid, yBC_i, 1459 ((double*)coefs)+doffset+1, (intptr_t)2*Nz*zs, 1460 ((double*)coefs)+coffset+1, (intptr_t)2*Nz*zs); 1461 } 1462 1463 // Now, solve in the Z-direction 1464 for (int ix=0; ix<Nx; ix++) 1465 for (int iy=0; iy<Ny; iy++) { 1466 intptr_t doffset = 2*((ix*Ny+iy)*Nz)*zs; 1467 intptr_t coffset = 2*((ix*Ny+iy)*Nz)*zs; 1468 // Real part 1469 find_coefs_1d_d (spline->z_grid, zBC_r, 1470 ((double*)coefs)+doffset, (intptr_t)2*zs, 1471 ((double*)coefs)+coffset, (intptr_t)2*zs); 1472 // Imag part 1473 find_coefs_1d_d (spline->z_grid, zBC_i, 1474 ((double*)coefs)+doffset+1, (intptr_t)2*zs, 1475 ((double*)coefs)+coffset+1, (intptr_t)2*zs); 1476 } 1477 } 1478 1479 1480 void 1481 destroy_multi_UBspline (Bspline *spline) 1482 { 1483 free(spline->coefs); 1484 free(spline); 1485 }