File indexing completed on 2024-05-12 05:55:12

0001 /* floaterf.c: normal distribution integrals erf and the like */
0002 /*
0003     Copyright (C) 2007 - 2009 Wolf Lammen.
0004 
0005     This program is free software; you can redistribute it and/or modify
0006     it under the terms of the GNU General Public License as published by
0007     the Free Software Foundation; either version 2 of the License , or
0008     (at your option) any later version.
0009 
0010     This program is distributed in the hope that it will be useful,
0011     but WITHOUT ANY WARRANTY; without even the implied warranty of
0012     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0013     GNU General Public License for more details.
0014 
0015     You should have received a copy of the GNU General Public License
0016     along with this program; see the file COPYING.  If not, write to:
0017 
0018       The Free Software Foundation, Inc.
0019       59 Temple Place, Suite 330
0020       Boston, MA 02111-1307 USA.
0021 
0022 
0023     You may contact the author by:
0024        e-mail:  ookami1 <at> gmx <dot> de
0025        mail:  Wolf Lammen
0026               Oertzweg 45
0027               22307 Hamburg
0028               Germany
0029 
0030 *************************************************************************/
0031 
0032 #include "floaterf.h"
0033 #include "floatconst.h"
0034 #include "floatcommon.h"
0035 #include "floatexp.h"
0036 #include "math.h"
0037 
0038 /*
0039   The Taylor expansion of sqrt(pi)*erf(x)/2 around x = 0.
0040   converges only for small |x| < 1 sufficiently.
0041   erf(x) = SUM[i>=0] (x^(2*i+1)/(i! * (2*i+1)))
0042 
0043   relative error for 100-digit evaluation: < 3e-100
0044 */
0045 
0046 char
0047 erfseries(floatnum x,
0048           int digits)
0049 {
0050   floatstruct xsqr, smd, pwr;
0051   int i, workprec, expx;
0052 
0053   expx = float_getexponent(x);
0054   workprec = digits + 2*expx + 2;
0055   if (workprec <= 0 || float_iszero(x))
0056     /* for tiny arguments approx. == x */
0057     return 1;
0058   float_create(&xsqr);
0059   float_create(&smd);
0060   float_create(&pwr);
0061   float_mul(&xsqr, x, x, workprec + 1);
0062   workprec = digits + float_getexponent(&xsqr) + 1;
0063   float_copy(&pwr, x, workprec + 1);
0064   i = 1;
0065   while (workprec > 0)
0066   {
0067     float_mul(&pwr, &pwr, &xsqr, workprec + 1);
0068     float_divi(&pwr, &pwr, -i, workprec + 1);
0069     float_divi(&smd, &pwr, 2 * i++ + 1, workprec);
0070     float_add(x, x, &smd, digits + 3);
0071     workprec = digits + float_getexponent(&smd) + expx + 2;
0072   }
0073   float_free(&pwr);
0074   float_free(&smd);
0075   float_free(&xsqr);
0076   return 1;
0077 }
0078 
0079 /* the asymptotic expansion of erfc, the bigger x is, the better.
0080    returns sum( (2*i+1)! /i! / x^(2*i)
0081    Relative error for x >= 16 and 100-digit evaluation is less than
0082    9e-100 */
0083 
0084 char
0085 erfcasymptotic(floatnum x,
0086                int digits)
0087 {
0088   floatstruct smd, fct;
0089   int i, workprec, newprec;
0090 
0091   float_create(&smd);
0092   float_create(&fct);
0093   workprec = digits - 2 * float_getexponent(x) + 1;
0094   if (workprec <= 0)
0095   {
0096     float_copy(x, &c1, EXACT);
0097     return 1;
0098   }
0099   float_mul(&fct, x, x, digits + 1);
0100   float_div(&fct, &c1Div2, &fct, digits);
0101   float_neg(&fct);
0102   float_copy(&smd, &c1, EXACT);
0103   float_setzero(x);
0104   newprec = digits;
0105   workprec = newprec;
0106   i = 1;
0107   while (newprec > 0 && newprec <= workprec)
0108   {
0109     workprec = newprec;
0110     float_add(x, x, &smd, digits + 4);
0111     float_muli(&smd, &smd, i, workprec + 1);
0112     float_mul(&smd, &smd, &fct, workprec + 2);
0113     newprec = digits + float_getexponent(&smd) + 1;
0114     i += 2;
0115   }
0116   float_free(&fct);
0117   float_free(&smd);
0118   return newprec <= workprec;
0119 }
0120 
0121 /* this algorithm is based on a paper from Crandall, who in turn attributes
0122    to Chiarella and Reichel.
0123    Found this in a paper from Borwein, Bailey and Girgensohn, and added
0124    minor improvements such as the adaptive working precision.
0125    There is a restriction with this algorithm not mentioned in the paper:
0126    x must not be too large, because the correcting term 2/(1-exp(2*pi*x/alpha))
0127    becomes dominant and renders the result incorrect for large x. Fortunately,
0128    the valid range seems to overlap with the range of the asymptotic formula.
0129 
0130    Picks a fixed alpha suitable for the desired precision and evaluates the sum
0131    f(t, alpha) = Sum[k>0](exp(-k*k*alpha*alpha)/(k*k*alpha*alpha + t)
0132    f(t, alpha) is used in the evaluation of erfc(sqrt(t))
0133 
0134    alpha is dependent on the desired precision; For a precision of p
0135    places, alpha should be < pi/sqrt(p*ln 10). Unfortunately, the
0136    smaller alpha is, the worse is the convergence rate, so alpha is
0137    usually approximately its upper limit.
0138 
0139    relative error for 100-digit evaluation < 5e-100 */
0140 
0141 char
0142 erfcsum(floatnum x, /* should be the square of the parameter to erfc */
0143         int digits)
0144 {
0145   int i;
0146   int workprec = 0;
0147   floatstruct sum, smd;
0148   floatnum Ei;
0149 
0150   if (digits > erfcdigits)
0151   {
0152     /* cannot re-use last evaluation's intermediate results */
0153     for (i = MAXERFCIDX; --i >= 0;)
0154       /* clear all exp(-k*k*alpha*alpha) to indicate their absence */
0155       float_free(&erfccoeff[i]);
0156     /* current precision */
0157     erfcdigits = digits;
0158     /* create new alpha appropriate for the desired precision
0159     This alpha need not be high precision, any alpha near the
0160     one evaluated here would do */
0161     float_setfloat(&erfcalpha, M_PI / aprxsqrt((digits + 4) * M_LN10));
0162     float_round(&erfcalpha, &erfcalpha, 3, TONEAREST);
0163 
0164     float_mul(&erfcalphasqr, &erfcalpha, &erfcalpha, EXACT);
0165     /* the exp(-k*k*alpha*alpha) are later evaluated iteratively.
0166     Initiate the iteration here */
0167     float_copy(&erfct2, &erfcalphasqr, EXACT);
0168     float_neg(&erfct2);
0169     _exp(&erfct2, digits + 3); /* exp(-alpha*alpha) */
0170     float_copy(erfccoeff, &erfct2, EXACT); /* start value */
0171     float_mul(&erfct3, &erfct2, &erfct2, digits + 3); /* exp(-2*alpha*alpha) */
0172   }
0173   float_create(&sum);
0174   float_create(&smd);
0175   float_setzero(&sum);
0176   for (i = 0; ++i < MAXERFCIDX;)
0177   {
0178     Ei = &erfccoeff[i-1];
0179     if (float_isnan(Ei))
0180     {
0181       /* if exp(-i*i*alpha*alpha) is not available, evaluate it from
0182       the coefficient of the last summand */
0183       float_mul(&erfct2, &erfct2, &erfct3, workprec + 3);
0184       float_mul(Ei, &erfct2, &erfccoeff[i-2], workprec + 3);
0185     }
0186     /* Ei finally decays rapidly. save some time by adjusting the
0187     working precision */
0188     workprec = digits + float_getexponent(Ei) + 1;
0189     if (workprec <= 0)
0190       break;
0191     /* evaluate the summand exp(-i*i*alpha*alpha)/(i*i*alpha*alpha+x) */
0192     float_muli(&smd, &erfcalphasqr, i*i, workprec);
0193     float_add(&smd, x, &smd, workprec + 2);
0194     float_div(&smd, Ei, &smd, workprec + 1);
0195     /* add summand to the series */
0196     float_add(&sum, &sum, &smd, digits + 3);
0197   }
0198   float_move(x, &sum);
0199   float_free(&smd);
0200   return 1;
0201 }
0202 
0203 /* checks the quality of the asymptotic series for erfc.
0204    If the ratio of two subsequent summands from the series
0205    (the convergence rate) should not fall below `ratio'
0206    for a desired result precision (represented by digits), the number
0207    of summands n must not be greater than
0208     n <= (`digits'*ln 10 + 0.5 * ln 2)/(1 - ln `ratio')
0209    and the parameter x has to fullfil
0210     x >= sqrt(n/`ratio')
0211    `ratio' must be a value < 1, If you pick a value close to
0212    1, you finally have to add quite a lot of summands from the
0213    series (in low precision), that affect a few digits at the low
0214    end of the result only. On the other hand, choosing a good
0215    convergence rate pushes the validity range of the series towards
0216    larger x.
0217    Here, the convergence rate is chosen to be 0.5, meaning that the
0218    addition of a summand from the series at least halfs the magnitude
0219    of the tail of the series.
0220    The evaluation is carried out in low precision using interger
0221    arithmetic rather than floating point data.
0222    For a 100 digit result the lower boundary of the range of the
0223    asymptotic series (truncated when the convergence rate falls below 0.5)
0224    is x > approx. 16.5.
0225    The above formulas estimate the limit x slightly too small, especially
0226    when `digits' is small. So, to compensate for that, r should be at least
0227    <= 0.92 */
0228 static char
0229 _asymptotic_good(
0230   floatnum x,
0231   int digits)
0232 {
0233   /* all constants scaled by 10000 */
0234   /* 1/ratio */
0235 #define RATIO 20000
0236   /* (1 - ln ratio) */
0237 #define C_RATIO 16931
0238   /* ln 10 */
0239 #define LN10 23026
0240   /* 0.5*ln 2 */
0241 #define LN2DIV2 3466
0242 
0243   int n, ix;
0244 
0245   if (!float_isvalidexp(float_getexponent(x) + 2)
0246       || (digits == 1 && float_cmp(x, &c2) >= 0))
0247     return 1;
0248   /* 10000 * n/ratio */
0249   n = RATIO*((digits * LN10 + LN2DIV2) / C_RATIO);
0250   float_addexp(x, 2);
0251   ix = float_asinteger(x);
0252   float_addexp(x, -2);
0253   return ix == 0 || ix >= 0x10000 || ix * ix >= n;
0254 }
0255 
0256 static int _logexpxsqr(int exp)
0257 {
0258   if (exp < 0)
0259     exp = 0;
0260   if (exp >= 0x1000)
0261     exp = 0x1000;
0262   return ((exp * exp * 73) >> 5);
0263 }
0264 
0265 char
0266 _erf(
0267   floatnum x,
0268   int digits)
0269 {
0270   int workprec;
0271   signed char sign;
0272 
0273   sign = float_getsign(x);
0274   float_abs(x);
0275   if (float_cmp(x, &c1Div2) > 0)
0276   {
0277     workprec = digits - _logexpxsqr(float_getexponent(x));
0278     if (workprec < 0 || !_erfc(x, workprec))
0279       float_setzero(x);
0280     float_sub(x, &c1, x, digits + 1);
0281   }
0282   else
0283   {
0284     erfnear0(x, digits);
0285     float_mul(x, x, &c2DivSqrtPi, digits + 2);
0286   }
0287   float_setsign(x, sign);
0288   return 1;
0289 }
0290 
0291 char
0292 _erfc(
0293   floatnum x,
0294   int digits)
0295 {
0296   floatstruct tmp, t2, t3;
0297   int expx, prec;
0298   char result;
0299 
0300   if (float_cmp(x, &c1Div2) <= 0)
0301   {
0302     /* use erfc(x) = 1 - erf(x) for small or negative x */
0303     prec = digits; /* default for negative x, result is approx. 1 */
0304     expx = float_getexponent(x);
0305     if (expx < 0)
0306     {
0307       /* |x| < 1, but not 0 */
0308       prec = expx + digits + 2;
0309       if (prec <= 0)
0310       {
0311         float_copy(x, &c1, EXACT);
0312         return 1;
0313       }
0314     }
0315     _erf(x, prec);
0316     float_sub(x, &c1, x, digits + 1);
0317     return 1;
0318   }
0319   float_create(&tmp);
0320   if (_asymptotic_good(x, digits))
0321   {
0322     if (float_mul(&tmp, x, x, digits + 5)
0323         && _exp(&tmp, digits + 3)
0324         && float_mul(&tmp, &tmp, x, digits + 3)
0325         && float_div(&tmp, &c1DivSqrtPi, &tmp, digits + 3))
0326     {
0327       if (!erfcbigx(x, digits))
0328         result = _seterror(x, EvalUnstable);
0329       else
0330         result = float_mul(x, x, &tmp, digits + 4);
0331     }
0332     else
0333       result = _seterror(x, Underflow);
0334   }
0335   else
0336   {
0337     result = 1;
0338     float_create(&t2);
0339     float_create(&t3);
0340     float_mul(&t2, x, x, digits + 2);
0341     float_copy(&tmp, &t2, EXACT);
0342     erfcsum(&tmp, digits);
0343     float_add(&tmp, &tmp, &tmp, digits + 1);
0344     float_copy(&t3, &t2, EXACT);
0345     float_reciprocal(&t2, digits + 1);
0346     float_add(&tmp, &tmp, &t2, digits + 2);
0347     float_neg(&t3);
0348     _exp(&t3, digits + 2);
0349     float_mul(&t3, &t3, &tmp, digits + 2);
0350     float_mul(&tmp, &erfcalpha, x, digits + 2);
0351     float_mul(&t3, &tmp, &t3, digits + 3);
0352     float_mul(&t3, &c1DivPi, &t3, digits + 2);
0353     /* quick estimate to find the right working precision */
0354     float_div(&tmp, x, &erfcalpha, 4);
0355     float_mul(&tmp, &tmp, &c2Pi, 4);
0356     float_div(&tmp, &tmp, &cLn10, 4);
0357     prec = digits - float_getexponent(&t3) - float_asinteger(&tmp) + 1;
0358     /* add correction term */
0359     if (prec > 0)
0360     {
0361       float_div(&tmp, x, &erfcalpha, prec + 3);
0362       float_mul(&tmp, &tmp, &c2Pi, prec + 4);
0363       _exp(&tmp, prec);
0364       float_sub(&tmp, &c1, &tmp, prec);
0365       float_div(&tmp, &c2, &tmp, prec);
0366       float_add(&t3, &t3, &tmp, digits + 1);
0367     }
0368     float_free(&t2);
0369     float_move(x, &t3);
0370   }
0371   float_free(&tmp);
0372   return result;
0373 }