File indexing completed on 2024-12-22 04:09:52
0001 ///////////////////////////////////////////////////////////////////////////// 0002 // einspline: a library for creating and evaluating B-splines // 0003 // Copyright (C) 2007 Kenneth P. Esler, Jr. // 0004 // // 0005 // This program is free software; you can redistribute it and/or modify // 0006 // it under the terms of the GNU General Public License as published by // 0007 // the Free Software Foundation; either version 2 of the License, or // 0008 // (at your option) any later version. // 0009 // // 0010 // This program is distributed in the hope that it will be useful, // 0011 // but WITHOUT ANY WARRANTY; without even the implied warranty of // 0012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // 0013 // GNU General Public License for more details. // 0014 // // 0015 // You should have received a copy of the GNU General Public License // 0016 // along with this program; if not, write to the Free Software // 0017 // Foundation, Inc., 51 Franklin Street, Fifth Floor, // 0018 // Boston, MA 02110-1301 USA // 0019 ///////////////////////////////////////////////////////////////////////////// 0020 0021 #include "nubasis.h" 0022 #include <stdlib.h> 0023 0024 0025 0026 NUBasis* 0027 create_NUBasis (NUgrid *grid, bool periodic) 0028 { 0029 NUBasis* restrict basis = new NUBasis; 0030 basis->grid = grid; 0031 basis->periodic = periodic; 0032 int N = grid->num_points; 0033 basis->xVals = new double[N+5]; 0034 basis->dxInv = new double[3*(N+2)]; 0035 for (int i=0; i<N; i++) 0036 basis->xVals[i+2] = grid->points[i]; 0037 double* restrict g = grid->points; 0038 // Extend grid points on either end to provide enough points to 0039 // construct a full basis set 0040 if (!periodic) { 0041 basis->xVals[0] = g[ 0 ] - 2.0*(g[1]-g[0]); 0042 basis->xVals[1] = g[ 0 ] - 1.0*(g[1]-g[0]); 0043 basis->xVals[N+2] = g[N-1] + 1.0*(g[N-1]-g[N-2]); 0044 basis->xVals[N+3] = g[N-1] + 2.0*(g[N-1]-g[N-2]); 0045 basis->xVals[N+4] = g[N-1] + 3.0*(g[N-1]-g[N-2]); 0046 } 0047 else { 0048 basis->xVals[1] = g[ 0 ] - (g[N-1] - g[N-2]); 0049 basis->xVals[0] = g[ 0 ] - (g[N-1] - g[N-3]); 0050 basis->xVals[N+2] = g[N-1] + (g[ 1 ] - g[ 0 ]); 0051 basis->xVals[N+3] = g[N-1] + (g[ 2 ] - g[ 0 ]); 0052 basis->xVals[N+4] = g[N-1] + (g[ 3 ] - g[ 0 ]); 0053 } 0054 for (int i=0; i<N+2; i++) 0055 for (int j=0; j<3; j++) 0056 basis->dxInv[3*i+j] = 0057 1.0/(basis->xVals[i+j+1]-basis->xVals[i]); 0058 return basis; 0059 } 0060 0061 void 0062 destroy_NUBasis (NUBasis *basis) 0063 { 0064 delete[] (basis->xVals); 0065 delete[] (basis->dxInv); 0066 delete (basis); 0067 } 0068 0069 0070 int 0071 get_NUBasis_funcs_s (NUBasis* restrict basis, double x, 0072 float bfuncs[4]) 0073 { 0074 double b1[2], b2[3]; 0075 int i = (*basis->grid->reverse_map)(basis->grid, x); 0076 int i2 = i+2; 0077 double* restrict dxInv = basis->dxInv; 0078 double* restrict xVals = basis->xVals; 0079 0080 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0081 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0082 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0083 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0084 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0085 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0086 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0087 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0088 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0089 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0090 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0091 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0092 return i; 0093 } 0094 0095 0096 void 0097 get_NUBasis_funcs_si (NUBasis* restrict basis, int i, 0098 float bfuncs[4]) 0099 { 0100 int i2 = i+2; 0101 double b1[2], b2[3]; 0102 double x = basis->grid->points[i]; 0103 double* restrict dxInv = basis->dxInv; 0104 double* restrict xVals = basis->xVals; 0105 0106 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0107 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0108 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0109 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0110 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0111 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0112 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0113 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0114 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0115 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0116 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0117 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0118 } 0119 0120 int 0121 get_NUBasis_dfuncs_s (NUBasis* restrict basis, double x, 0122 float bfuncs[4], float dbfuncs[4]) 0123 { 0124 double b1[2], b2[3]; 0125 int i = (*basis->grid->reverse_map)(basis->grid, x); 0126 int i2 = i+2; 0127 double* restrict dxInv = basis->dxInv; 0128 double* restrict xVals = basis->xVals; 0129 0130 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0131 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0132 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0133 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0134 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0135 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0136 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0137 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0138 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0139 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0140 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0141 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0142 0143 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]); 0144 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); 0145 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); 0146 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]); 0147 0148 return i; 0149 } 0150 0151 0152 void 0153 get_NUBasis_dfuncs_si (NUBasis* restrict basis, int i, 0154 float bfuncs[4], float dbfuncs[4]) 0155 { 0156 double b1[2], b2[3]; 0157 double x = basis->grid->points[i]; 0158 int i2 = i+2; 0159 double* restrict dxInv = basis->dxInv; 0160 double* restrict xVals = basis->xVals; 0161 0162 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0163 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0164 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0165 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0166 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0167 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0168 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0169 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0170 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0171 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0172 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0173 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0174 0175 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]); 0176 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); 0177 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); 0178 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]); 0179 } 0180 0181 0182 int 0183 get_NUBasis_d2funcs_s (NUBasis* restrict basis, double x, 0184 float bfuncs[4], float dbfuncs[4], float d2bfuncs[4]) 0185 { 0186 double b1[2], b2[3]; 0187 int i = (*basis->grid->reverse_map)(basis->grid, x); 0188 int i2 = i+2; 0189 double* restrict dxInv = basis->dxInv; 0190 double* restrict xVals = basis->xVals; 0191 0192 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0193 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0194 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0195 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0196 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0197 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0198 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0199 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0200 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0201 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0202 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0203 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0204 0205 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]); 0206 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); 0207 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); 0208 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]); 0209 0210 d2bfuncs[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); 0211 d2bfuncs[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + 0212 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); 0213 d2bfuncs[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - 0214 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); 0215 d2bfuncs[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); 0216 0217 return i; 0218 } 0219 0220 0221 void 0222 get_NUBasis_d2funcs_si (NUBasis* restrict basis, int i, 0223 float bfuncs[4], float dbfuncs[4], float d2bfuncs[4]) 0224 { 0225 double b1[2], b2[3]; 0226 double x = basis->grid->points[i]; 0227 int i2 = i+2; 0228 double* restrict dxInv = basis->dxInv; 0229 double* restrict xVals = basis->xVals; 0230 0231 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0232 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0233 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0234 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0235 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0236 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0237 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0238 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0239 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0240 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0241 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0242 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0243 0244 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]); 0245 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); 0246 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); 0247 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]); 0248 0249 d2bfuncs[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); 0250 d2bfuncs[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + 0251 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); 0252 d2bfuncs[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - 0253 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); 0254 d2bfuncs[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); 0255 } 0256 0257 0258 ////////////////////////////// 0259 // Double-precision version // 0260 ////////////////////////////// 0261 int 0262 get_NUBasis_funcs_d (NUBasis* restrict basis, double x, 0263 double bfuncs[4]) 0264 { 0265 double b1[2], b2[3]; 0266 int i = (*basis->grid->reverse_map)(basis->grid, x); 0267 int i2 = i+2; 0268 double* restrict dxInv = basis->dxInv; 0269 double* restrict xVals = basis->xVals; 0270 0271 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0272 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0273 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0274 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0275 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0276 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0277 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0278 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0279 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0280 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0281 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0282 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0283 return i; 0284 } 0285 0286 0287 void 0288 get_NUBasis_funcs_di (NUBasis* restrict basis, int i, 0289 double bfuncs[4]) 0290 { 0291 int i2 = i+2; 0292 double b1[2], b2[3]; 0293 double x = basis->grid->points[i]; 0294 double* restrict dxInv = basis->dxInv; 0295 double* restrict xVals = basis->xVals; 0296 0297 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0298 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0299 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0300 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0301 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0302 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0303 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0304 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0305 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0306 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0307 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0308 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0309 } 0310 0311 int 0312 get_NUBasis_dfuncs_d (NUBasis* restrict basis, double x, 0313 double bfuncs[4], double dbfuncs[4]) 0314 { 0315 double b1[2], b2[3]; 0316 int i = (*basis->grid->reverse_map)(basis->grid, x); 0317 int i2 = i+2; 0318 double* restrict dxInv = basis->dxInv; 0319 double* restrict xVals = basis->xVals; 0320 0321 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0322 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0323 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0324 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0325 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0326 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0327 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0328 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0329 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0330 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0331 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0332 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0333 0334 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]); 0335 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); 0336 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); 0337 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]); 0338 0339 return i; 0340 } 0341 0342 0343 void 0344 get_NUBasis_dfuncs_di (NUBasis* restrict basis, int i, 0345 double bfuncs[4], double dbfuncs[4]) 0346 { 0347 double b1[2], b2[3]; 0348 double x = basis->grid->points[i]; 0349 int i2 = i+2; 0350 double* restrict dxInv = basis->dxInv; 0351 double* restrict xVals = basis->xVals; 0352 0353 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0354 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0355 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0356 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0357 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0358 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0359 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0360 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0361 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0362 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0363 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0364 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0365 0366 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]); 0367 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); 0368 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); 0369 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]); 0370 } 0371 0372 0373 int 0374 get_NUBasis_d2funcs_d (NUBasis* restrict basis, double x, 0375 double bfuncs[4], double dbfuncs[4], double d2bfuncs[4]) 0376 { 0377 double b1[2], b2[3]; 0378 int i = (*basis->grid->reverse_map)(basis->grid, x); 0379 int i2 = i+2; 0380 double* restrict dxInv = basis->dxInv; 0381 double* restrict xVals = basis->xVals; 0382 0383 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0384 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0385 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0386 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0387 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0388 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0389 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0390 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0391 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0392 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0393 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0394 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0395 0396 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]); 0397 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); 0398 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); 0399 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]); 0400 0401 d2bfuncs[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); 0402 d2bfuncs[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + 0403 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); 0404 d2bfuncs[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - 0405 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); 0406 d2bfuncs[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); 0407 0408 return i; 0409 } 0410 0411 0412 void 0413 get_NUBasis_d2funcs_di (NUBasis* restrict basis, int i, 0414 double bfuncs[4], double dbfuncs[4], 0415 double d2bfuncs[4]) 0416 { 0417 double b1[2], b2[3]; 0418 double x = basis->grid->points[i]; 0419 int i2 = i+2; 0420 double* restrict dxInv = basis->dxInv; 0421 double* restrict xVals = basis->xVals; 0422 0423 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0424 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0425 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0426 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0427 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0428 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0429 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0430 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0431 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0432 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0433 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0434 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0435 0436 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]); 0437 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); 0438 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); 0439 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]); 0440 0441 d2bfuncs[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); 0442 d2bfuncs[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + 0443 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); 0444 d2bfuncs[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - 0445 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); 0446 d2bfuncs[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); 0447 } 0448 0449 0450 #ifdef HAVE_SSE2 0451 typedef union 0452 { 0453 float s[4]; 0454 __m128 v; 0455 } uvec4; 0456 0457 typedef union 0458 { 0459 double s[2]; 0460 __m128d v; 0461 } uvec2; 0462 0463 int 0464 get_NUBasis_funcs_sse_s (NUBasis* restrict basis, double x, 0465 __m128 *restrict funcs) 0466 { 0467 double b1[2], b2[3]; 0468 int i = (*basis->grid->reverse_map)(basis->grid, x); 0469 int i2 = i+2; 0470 double* restrict dxInv = basis->dxInv; 0471 double* restrict xVals = basis->xVals; 0472 0473 uvec4 bfuncs; 0474 0475 0476 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0477 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0478 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0479 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0480 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0481 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0482 bfuncs.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0483 bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0484 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0485 bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0486 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0487 bfuncs.s[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0488 *funcs = bfuncs.v; 0489 return i; 0490 } 0491 0492 int 0493 get_NUBasis_dfuncs_sse_s (NUBasis* restrict basis, double x, 0494 __m128 *restrict funcs, __m128 *restrict dfuncs) 0495 { 0496 double b1[2], b2[3]; 0497 int i = (*basis->grid->reverse_map)(basis->grid, x); 0498 int i2 = i+2; 0499 double* restrict dxInv = basis->dxInv; 0500 double* restrict xVals = basis->xVals; 0501 uvec4 bfuncs, dbfuncs; 0502 0503 0504 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0505 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0506 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0507 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0508 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0509 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0510 bfuncs.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0511 bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0512 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0513 bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0514 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0515 bfuncs.s[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0516 0517 dbfuncs.s[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]); 0518 dbfuncs.s[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); 0519 dbfuncs.s[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); 0520 dbfuncs.s[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]); 0521 0522 *funcs = bfuncs.v; 0523 *dfuncs = dbfuncs.v; 0524 0525 return i; 0526 } 0527 0528 int 0529 get_NUBasis_d2funcs_sse_s (NUBasis* restrict basis, double x, 0530 __m128 *restrict funcs, __m128 *restrict dfuncs, __m128 *restrict d2funcs) 0531 { 0532 double b1[2], b2[3]; 0533 int i = (*basis->grid->reverse_map)(basis->grid, x); 0534 int i2 = i+2; 0535 double* restrict dxInv = basis->dxInv; 0536 double* restrict xVals = basis->xVals; 0537 uvec4 bfuncs, dbfuncs, d2bfuncs; 0538 0539 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0540 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0541 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0542 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0543 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0544 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0545 bfuncs.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0546 bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0547 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0548 bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0549 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0550 bfuncs.s[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0551 0552 dbfuncs.s[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]); 0553 dbfuncs.s[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); 0554 dbfuncs.s[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); 0555 dbfuncs.s[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]); 0556 0557 d2bfuncs.s[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); 0558 d2bfuncs.s[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + 0559 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); 0560 d2bfuncs.s[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - 0561 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); 0562 d2bfuncs.s[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); 0563 0564 *funcs = bfuncs.v; 0565 *dfuncs = dbfuncs.v; 0566 *d2funcs = d2bfuncs.v; 0567 0568 return i; 0569 } 0570 0571 0572 ////////////////////////////// 0573 // Double-precision version // 0574 ////////////////////////////// 0575 int 0576 get_NUBasis_funcs_sse_d (NUBasis* restrict basis, double x, 0577 __m128d *restrict f01, __m128d *restrict f23) 0578 { 0579 double b1[2], b2[3]; 0580 int i = (*basis->grid->reverse_map)(basis->grid, x); 0581 int i2 = i+2; 0582 double* restrict dxInv = basis->dxInv; 0583 double* restrict xVals = basis->xVals; 0584 uvec2 bf01, bf23, dbf01, dbf23; 0585 0586 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0587 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0588 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0589 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0590 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0591 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0592 bf01.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0593 bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0594 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0595 bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0596 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0597 bf23.s[1] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0598 0599 *f01 = bf01.v; *f23 = bf23.v; 0600 return i; 0601 } 0602 0603 int 0604 get_NUBasis_dfuncs_sse_d (NUBasis* restrict basis, double x, 0605 __m128d *restrict f01, __m128d *restrict f23, 0606 __m128d *restrict df01, __m128d *restrict df23) 0607 0608 { 0609 double b1[2], b2[3]; 0610 int i = (*basis->grid->reverse_map)(basis->grid, x); 0611 int i2 = i+2; 0612 double* restrict dxInv = basis->dxInv; 0613 double* restrict xVals = basis->xVals; 0614 uvec2 bf01, bf23, dbf01, dbf23; 0615 0616 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0617 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0618 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0619 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0620 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0621 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0622 bf01.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0623 bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0624 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0625 bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0626 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0627 bf23.s[1] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0628 0629 dbf01.s[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]); 0630 dbf01.s[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); 0631 dbf23.s[0] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); 0632 dbf23.s[1] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]); 0633 0634 *f01 = bf01.v; *f23 = bf23.v; 0635 *df01 = dbf01.v; *df23 = dbf23.v; 0636 0637 return i; 0638 } 0639 0640 int 0641 get_NUBasis_d2funcs_sse_d (NUBasis* restrict basis, double x, 0642 __m128d *restrict f01, __m128d *restrict f23, 0643 __m128d *restrict df01, __m128d *restrict df23, 0644 __m128d *restrict d2f01, __m128d *restrict d2f23) 0645 { 0646 double b1[2], b2[3]; 0647 int i = (*basis->grid->reverse_map)(basis->grid, x); 0648 int i2 = i+2; 0649 double* restrict dxInv = basis->dxInv; 0650 double* restrict xVals = basis->xVals; 0651 uvec2 bf01, bf23, dbf01, dbf23, d2bf01, d2bf23; 0652 0653 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; 0654 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; 0655 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; 0656 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ 0657 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); 0658 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; 0659 bf01.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; 0660 bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + 0661 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); 0662 bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + 0663 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); 0664 bf23.s[1] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; 0665 0666 dbf01.s[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]); 0667 dbf01.s[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); 0668 dbf23.s[0] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); 0669 dbf23.s[1] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]); 0670 0671 d2bf01.s[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); 0672 d2bf01.s[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + 0673 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); 0674 d2bf23.s[0] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - 0675 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); 0676 d2bf23.s[1] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); 0677 0678 *f01 = bf01.v; *f23 = bf23.v; 0679 *df01 = dbf01.v; *df23 = dbf23.v; 0680 *d2f01 = d2bf01.v; *d2f23 = d2bf23.v; 0681 0682 return i; 0683 } 0684 0685 #endif