File indexing completed on 2024-05-12 05:55:12
0001 /* floaterf.c: normal distribution integrals erf and the like */ 0002 /* 0003 Copyright (C) 2007 - 2009 Wolf Lammen. 0004 0005 This program is free software; you can redistribute it and/or modify 0006 it under the terms of the GNU General Public License as published by 0007 the Free Software Foundation; either version 2 of the License , or 0008 (at your option) any later version. 0009 0010 This program is distributed in the hope that it will be useful, 0011 but WITHOUT ANY WARRANTY; without even the implied warranty of 0012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0013 GNU General Public License for more details. 0014 0015 You should have received a copy of the GNU General Public License 0016 along with this program; see the file COPYING. If not, write to: 0017 0018 The Free Software Foundation, Inc. 0019 59 Temple Place, Suite 330 0020 Boston, MA 02111-1307 USA. 0021 0022 0023 You may contact the author by: 0024 e-mail: ookami1 <at> gmx <dot> de 0025 mail: Wolf Lammen 0026 Oertzweg 45 0027 22307 Hamburg 0028 Germany 0029 0030 *************************************************************************/ 0031 0032 #include "floaterf.h" 0033 #include "floatconst.h" 0034 #include "floatcommon.h" 0035 #include "floatexp.h" 0036 #include "math.h" 0037 0038 /* 0039 The Taylor expansion of sqrt(pi)*erf(x)/2 around x = 0. 0040 converges only for small |x| < 1 sufficiently. 0041 erf(x) = SUM[i>=0] (x^(2*i+1)/(i! * (2*i+1))) 0042 0043 relative error for 100-digit evaluation: < 3e-100 0044 */ 0045 0046 char 0047 erfseries(floatnum x, 0048 int digits) 0049 { 0050 floatstruct xsqr, smd, pwr; 0051 int i, workprec, expx; 0052 0053 expx = float_getexponent(x); 0054 workprec = digits + 2*expx + 2; 0055 if (workprec <= 0 || float_iszero(x)) 0056 /* for tiny arguments approx. == x */ 0057 return 1; 0058 float_create(&xsqr); 0059 float_create(&smd); 0060 float_create(&pwr); 0061 float_mul(&xsqr, x, x, workprec + 1); 0062 workprec = digits + float_getexponent(&xsqr) + 1; 0063 float_copy(&pwr, x, workprec + 1); 0064 i = 1; 0065 while (workprec > 0) 0066 { 0067 float_mul(&pwr, &pwr, &xsqr, workprec + 1); 0068 float_divi(&pwr, &pwr, -i, workprec + 1); 0069 float_divi(&smd, &pwr, 2 * i++ + 1, workprec); 0070 float_add(x, x, &smd, digits + 3); 0071 workprec = digits + float_getexponent(&smd) + expx + 2; 0072 } 0073 float_free(&pwr); 0074 float_free(&smd); 0075 float_free(&xsqr); 0076 return 1; 0077 } 0078 0079 /* the asymptotic expansion of erfc, the bigger x is, the better. 0080 returns sum( (2*i+1)! /i! / x^(2*i) 0081 Relative error for x >= 16 and 100-digit evaluation is less than 0082 9e-100 */ 0083 0084 char 0085 erfcasymptotic(floatnum x, 0086 int digits) 0087 { 0088 floatstruct smd, fct; 0089 int i, workprec, newprec; 0090 0091 float_create(&smd); 0092 float_create(&fct); 0093 workprec = digits - 2 * float_getexponent(x) + 1; 0094 if (workprec <= 0) 0095 { 0096 float_copy(x, &c1, EXACT); 0097 return 1; 0098 } 0099 float_mul(&fct, x, x, digits + 1); 0100 float_div(&fct, &c1Div2, &fct, digits); 0101 float_neg(&fct); 0102 float_copy(&smd, &c1, EXACT); 0103 float_setzero(x); 0104 newprec = digits; 0105 workprec = newprec; 0106 i = 1; 0107 while (newprec > 0 && newprec <= workprec) 0108 { 0109 workprec = newprec; 0110 float_add(x, x, &smd, digits + 4); 0111 float_muli(&smd, &smd, i, workprec + 1); 0112 float_mul(&smd, &smd, &fct, workprec + 2); 0113 newprec = digits + float_getexponent(&smd) + 1; 0114 i += 2; 0115 } 0116 float_free(&fct); 0117 float_free(&smd); 0118 return newprec <= workprec; 0119 } 0120 0121 /* this algorithm is based on a paper from Crandall, who in turn attributes 0122 to Chiarella and Reichel. 0123 Found this in a paper from Borwein, Bailey and Girgensohn, and added 0124 minor improvements such as the adaptive working precision. 0125 There is a restriction with this algorithm not mentioned in the paper: 0126 x must not be too large, because the correcting term 2/(1-exp(2*pi*x/alpha)) 0127 becomes dominant and renders the result incorrect for large x. Fortunately, 0128 the valid range seems to overlap with the range of the asymptotic formula. 0129 0130 Picks a fixed alpha suitable for the desired precision and evaluates the sum 0131 f(t, alpha) = Sum[k>0](exp(-k*k*alpha*alpha)/(k*k*alpha*alpha + t) 0132 f(t, alpha) is used in the evaluation of erfc(sqrt(t)) 0133 0134 alpha is dependent on the desired precision; For a precision of p 0135 places, alpha should be < pi/sqrt(p*ln 10). Unfortunately, the 0136 smaller alpha is, the worse is the convergence rate, so alpha is 0137 usually approximately its upper limit. 0138 0139 relative error for 100-digit evaluation < 5e-100 */ 0140 0141 char 0142 erfcsum(floatnum x, /* should be the square of the parameter to erfc */ 0143 int digits) 0144 { 0145 int i; 0146 int workprec = 0; 0147 floatstruct sum, smd; 0148 floatnum Ei; 0149 0150 if (digits > erfcdigits) 0151 { 0152 /* cannot re-use last evaluation's intermediate results */ 0153 for (i = MAXERFCIDX; --i >= 0;) 0154 /* clear all exp(-k*k*alpha*alpha) to indicate their absence */ 0155 float_free(&erfccoeff[i]); 0156 /* current precision */ 0157 erfcdigits = digits; 0158 /* create new alpha appropriate for the desired precision 0159 This alpha need not be high precision, any alpha near the 0160 one evaluated here would do */ 0161 float_setfloat(&erfcalpha, M_PI / aprxsqrt((digits + 4) * M_LN10)); 0162 float_round(&erfcalpha, &erfcalpha, 3, TONEAREST); 0163 0164 float_mul(&erfcalphasqr, &erfcalpha, &erfcalpha, EXACT); 0165 /* the exp(-k*k*alpha*alpha) are later evaluated iteratively. 0166 Initiate the iteration here */ 0167 float_copy(&erfct2, &erfcalphasqr, EXACT); 0168 float_neg(&erfct2); 0169 _exp(&erfct2, digits + 3); /* exp(-alpha*alpha) */ 0170 float_copy(erfccoeff, &erfct2, EXACT); /* start value */ 0171 float_mul(&erfct3, &erfct2, &erfct2, digits + 3); /* exp(-2*alpha*alpha) */ 0172 } 0173 float_create(&sum); 0174 float_create(&smd); 0175 float_setzero(&sum); 0176 for (i = 0; ++i < MAXERFCIDX;) 0177 { 0178 Ei = &erfccoeff[i-1]; 0179 if (float_isnan(Ei)) 0180 { 0181 /* if exp(-i*i*alpha*alpha) is not available, evaluate it from 0182 the coefficient of the last summand */ 0183 float_mul(&erfct2, &erfct2, &erfct3, workprec + 3); 0184 float_mul(Ei, &erfct2, &erfccoeff[i-2], workprec + 3); 0185 } 0186 /* Ei finally decays rapidly. save some time by adjusting the 0187 working precision */ 0188 workprec = digits + float_getexponent(Ei) + 1; 0189 if (workprec <= 0) 0190 break; 0191 /* evaluate the summand exp(-i*i*alpha*alpha)/(i*i*alpha*alpha+x) */ 0192 float_muli(&smd, &erfcalphasqr, i*i, workprec); 0193 float_add(&smd, x, &smd, workprec + 2); 0194 float_div(&smd, Ei, &smd, workprec + 1); 0195 /* add summand to the series */ 0196 float_add(&sum, &sum, &smd, digits + 3); 0197 } 0198 float_move(x, &sum); 0199 float_free(&smd); 0200 return 1; 0201 } 0202 0203 /* checks the quality of the asymptotic series for erfc. 0204 If the ratio of two subsequent summands from the series 0205 (the convergence rate) should not fall below `ratio' 0206 for a desired result precision (represented by digits), the number 0207 of summands n must not be greater than 0208 n <= (`digits'*ln 10 + 0.5 * ln 2)/(1 - ln `ratio') 0209 and the parameter x has to fullfil 0210 x >= sqrt(n/`ratio') 0211 `ratio' must be a value < 1, If you pick a value close to 0212 1, you finally have to add quite a lot of summands from the 0213 series (in low precision), that affect a few digits at the low 0214 end of the result only. On the other hand, choosing a good 0215 convergence rate pushes the validity range of the series towards 0216 larger x. 0217 Here, the convergence rate is chosen to be 0.5, meaning that the 0218 addition of a summand from the series at least halfs the magnitude 0219 of the tail of the series. 0220 The evaluation is carried out in low precision using interger 0221 arithmetic rather than floating point data. 0222 For a 100 digit result the lower boundary of the range of the 0223 asymptotic series (truncated when the convergence rate falls below 0.5) 0224 is x > approx. 16.5. 0225 The above formulas estimate the limit x slightly too small, especially 0226 when `digits' is small. So, to compensate for that, r should be at least 0227 <= 0.92 */ 0228 static char 0229 _asymptotic_good( 0230 floatnum x, 0231 int digits) 0232 { 0233 /* all constants scaled by 10000 */ 0234 /* 1/ratio */ 0235 #define RATIO 20000 0236 /* (1 - ln ratio) */ 0237 #define C_RATIO 16931 0238 /* ln 10 */ 0239 #define LN10 23026 0240 /* 0.5*ln 2 */ 0241 #define LN2DIV2 3466 0242 0243 int n, ix; 0244 0245 if (!float_isvalidexp(float_getexponent(x) + 2) 0246 || (digits == 1 && float_cmp(x, &c2) >= 0)) 0247 return 1; 0248 /* 10000 * n/ratio */ 0249 n = RATIO*((digits * LN10 + LN2DIV2) / C_RATIO); 0250 float_addexp(x, 2); 0251 ix = float_asinteger(x); 0252 float_addexp(x, -2); 0253 return ix == 0 || ix >= 0x10000 || ix * ix >= n; 0254 } 0255 0256 static int _logexpxsqr(int exp) 0257 { 0258 if (exp < 0) 0259 exp = 0; 0260 if (exp >= 0x1000) 0261 exp = 0x1000; 0262 return ((exp * exp * 73) >> 5); 0263 } 0264 0265 char 0266 _erf( 0267 floatnum x, 0268 int digits) 0269 { 0270 int workprec; 0271 signed char sign; 0272 0273 sign = float_getsign(x); 0274 float_abs(x); 0275 if (float_cmp(x, &c1Div2) > 0) 0276 { 0277 workprec = digits - _logexpxsqr(float_getexponent(x)); 0278 if (workprec < 0 || !_erfc(x, workprec)) 0279 float_setzero(x); 0280 float_sub(x, &c1, x, digits + 1); 0281 } 0282 else 0283 { 0284 erfnear0(x, digits); 0285 float_mul(x, x, &c2DivSqrtPi, digits + 2); 0286 } 0287 float_setsign(x, sign); 0288 return 1; 0289 } 0290 0291 char 0292 _erfc( 0293 floatnum x, 0294 int digits) 0295 { 0296 floatstruct tmp, t2, t3; 0297 int expx, prec; 0298 char result; 0299 0300 if (float_cmp(x, &c1Div2) <= 0) 0301 { 0302 /* use erfc(x) = 1 - erf(x) for small or negative x */ 0303 prec = digits; /* default for negative x, result is approx. 1 */ 0304 expx = float_getexponent(x); 0305 if (expx < 0) 0306 { 0307 /* |x| < 1, but not 0 */ 0308 prec = expx + digits + 2; 0309 if (prec <= 0) 0310 { 0311 float_copy(x, &c1, EXACT); 0312 return 1; 0313 } 0314 } 0315 _erf(x, prec); 0316 float_sub(x, &c1, x, digits + 1); 0317 return 1; 0318 } 0319 float_create(&tmp); 0320 if (_asymptotic_good(x, digits)) 0321 { 0322 if (float_mul(&tmp, x, x, digits + 5) 0323 && _exp(&tmp, digits + 3) 0324 && float_mul(&tmp, &tmp, x, digits + 3) 0325 && float_div(&tmp, &c1DivSqrtPi, &tmp, digits + 3)) 0326 { 0327 if (!erfcbigx(x, digits)) 0328 result = _seterror(x, EvalUnstable); 0329 else 0330 result = float_mul(x, x, &tmp, digits + 4); 0331 } 0332 else 0333 result = _seterror(x, Underflow); 0334 } 0335 else 0336 { 0337 result = 1; 0338 float_create(&t2); 0339 float_create(&t3); 0340 float_mul(&t2, x, x, digits + 2); 0341 float_copy(&tmp, &t2, EXACT); 0342 erfcsum(&tmp, digits); 0343 float_add(&tmp, &tmp, &tmp, digits + 1); 0344 float_copy(&t3, &t2, EXACT); 0345 float_reciprocal(&t2, digits + 1); 0346 float_add(&tmp, &tmp, &t2, digits + 2); 0347 float_neg(&t3); 0348 _exp(&t3, digits + 2); 0349 float_mul(&t3, &t3, &tmp, digits + 2); 0350 float_mul(&tmp, &erfcalpha, x, digits + 2); 0351 float_mul(&t3, &tmp, &t3, digits + 3); 0352 float_mul(&t3, &c1DivPi, &t3, digits + 2); 0353 /* quick estimate to find the right working precision */ 0354 float_div(&tmp, x, &erfcalpha, 4); 0355 float_mul(&tmp, &tmp, &c2Pi, 4); 0356 float_div(&tmp, &tmp, &cLn10, 4); 0357 prec = digits - float_getexponent(&t3) - float_asinteger(&tmp) + 1; 0358 /* add correction term */ 0359 if (prec > 0) 0360 { 0361 float_div(&tmp, x, &erfcalpha, prec + 3); 0362 float_mul(&tmp, &tmp, &c2Pi, prec + 4); 0363 _exp(&tmp, prec); 0364 float_sub(&tmp, &c1, &tmp, prec); 0365 float_div(&tmp, &c2, &tmp, prec); 0366 float_add(&t3, &t3, &tmp, digits + 1); 0367 } 0368 float_free(&t2); 0369 float_move(x, &t3); 0370 } 0371 float_free(&tmp); 0372 return result; 0373 }