File indexing completed on 2024-05-12 03:47:54
0001 /* 0002 File : nsl_stats.c 0003 Project : LabPlot 0004 Description : NSL statistics functions 0005 -------------------------------------------------------------------- 0006 SPDX-FileCopyrightText: 2016-2017 Stefan Gerlach <stefan.gerlach@uni.kn> 0007 SPDX-License-Identifier: GPL-2.0-or-later 0008 */ 0009 0010 #include "nsl_stats.h" 0011 #include <float.h> 0012 #include <gsl/gsl_cdf.h> 0013 #include <gsl/gsl_sort.h> 0014 #include <math.h> 0015 0016 double nsl_stats_minimum(const double data[], const size_t n, size_t* index) { 0017 size_t i; 0018 0019 double min = data[0]; 0020 if (index != NULL) 0021 *index = 0; 0022 0023 for (i = 1; i < n; i++) { 0024 if (data[i] < min) { 0025 min = data[i]; 0026 if (index != NULL) 0027 *index = i; 0028 } 0029 } 0030 0031 return min; 0032 } 0033 0034 double nsl_stats_maximum(const double data[], const size_t n, size_t* index) { 0035 size_t i; 0036 0037 double max = data[0]; 0038 if (index != NULL) 0039 *index = 0; 0040 0041 for (i = 1; i < n; i++) { 0042 if (data[i] > max) { 0043 max = data[i]; 0044 if (index != NULL) 0045 *index = i; 0046 } 0047 } 0048 0049 return max; 0050 } 0051 0052 double nsl_stats_median(double data[], size_t stride, size_t n, nsl_stats_quantile_type type) { 0053 gsl_sort(data, stride, n); 0054 return nsl_stats_median_sorted(data, stride, n, type); 0055 } 0056 0057 double nsl_stats_median_sorted(const double sorted_data[], size_t stride, size_t n, nsl_stats_quantile_type type) { 0058 return nsl_stats_quantile_sorted(sorted_data, stride, n, 0.5, type); 0059 } 0060 0061 double nsl_stats_median_from_sorted_data(const double sorted_data[], size_t stride, size_t n) { 0062 // default method is number 7 0063 return nsl_stats_median_sorted(sorted_data, stride, n, nsl_stats_quantile_type7); 0064 } 0065 0066 double nsl_stats_quantile(double data[], size_t stride, size_t n, double p, nsl_stats_quantile_type type) { 0067 gsl_sort(data, stride, n); 0068 return nsl_stats_quantile_sorted(data, stride, n, p, type); 0069 } 0070 0071 double nsl_stats_quantile_sorted(const double d[], size_t stride, size_t n, double p, nsl_stats_quantile_type type) { 0072 switch (type) { 0073 case nsl_stats_quantile_type1: // h = Np + 1/2, x[ceil(h – 1/2)] 0074 if (p == 0.0) 0075 return d[0]; 0076 else 0077 return d[((int)ceil(n * p) - 1) * stride]; 0078 case nsl_stats_quantile_type2: 0079 if (p == 0.0) 0080 return d[0]; 0081 else if (p == 1.0) 0082 return d[(n - 1) * stride]; 0083 else 0084 return (d[((int)ceil(n * p) - 1) * stride] + d[((int)ceil(n * p + 1) - 1) * stride]) / 2.; 0085 case nsl_stats_quantile_type3: 0086 if (p <= 0.5 / n) 0087 return d[0]; 0088 else 0089 #ifdef _WIN32 0090 return d[((int)round(n * p) - 1) * stride]; 0091 #else 0092 return d[(lrint(n * p) - 1) * stride]; 0093 #endif 0094 case nsl_stats_quantile_type4: 0095 if (p < 1. / n) 0096 return d[0]; 0097 else if (p == 1.0) 0098 return d[(n - 1) * stride]; 0099 else { 0100 int i = (int)floor(n * p); 0101 return d[(i - 1) * stride] + (n * p - i) * (d[i * stride] - d[(i - 1) * stride]); 0102 } 0103 case nsl_stats_quantile_type5: 0104 if (p < 0.5 / n) 0105 return d[0]; 0106 else if (p >= (n - 0.5) / n) 0107 return d[(n - 1) * stride]; 0108 else { 0109 int i = (int)floor(n * p + 0.5); 0110 return d[(i - 1) * stride] + (n * p + 0.5 - i) * (d[i * stride] - d[(i - 1) * stride]); 0111 } 0112 case nsl_stats_quantile_type6: 0113 if (p < 1. / (n + 1.)) 0114 return d[0]; 0115 else if (p > n / (n + 1.)) 0116 return d[(n - 1) * stride]; 0117 else { 0118 int i = (int)floor((n + 1) * p); 0119 return d[(i - 1) * stride] + ((n + 1) * p - i) * (d[i * stride] - d[(i - 1) * stride]); 0120 } 0121 case nsl_stats_quantile_type7: // = gsl_stats_quantile_from_sorted_data(d, stride, n, p); 0122 if (p == 1.0 || n == 1) 0123 return d[(n - 1) * stride]; 0124 else { 0125 int i = (int)floor((n - 1) * p + 1); 0126 return d[(i - 1) * stride] + ((n - 1) * p + 1 - i) * (d[i * stride] - d[(i - 1) * stride]); 0127 } 0128 case nsl_stats_quantile_type8: 0129 if (p < 2. / 3. / (n + 1. / 3.)) 0130 return d[0]; 0131 else if (p >= (n - 1. / 3.) / (n + 1. / 3.)) 0132 return d[(n - 1) * stride]; 0133 else { 0134 int i = (int)floor((n + 1. / 3.) * p + 1. / 3.); 0135 return d[(i - 1) * stride] + ((n + 1. / 3.) * p + 1. / 3. - i) * (d[i * stride] - d[(i - 1) * stride]); 0136 } 0137 case nsl_stats_quantile_type9: 0138 if (p < 5. / 8. / (n + 1. / 4.)) 0139 return d[0]; 0140 else if (p >= (n - 3. / 8.) / (n + 1. / 4.)) 0141 return d[(n - 1) * stride]; 0142 else { 0143 int i = (int)floor((n + 1. / 4.) * p + 3. / 8.); 0144 return d[(i - 1) * stride] + ((n + 1. / 4.) * p + 3. / 8. - i) * (d[i * stride] - d[(i - 1) * stride]); 0145 } 0146 } 0147 0148 return 0; 0149 } 0150 0151 double nsl_stats_quantile_from_sorted_data(const double sorted_data[], size_t stride, size_t n, double p) { 0152 return nsl_stats_quantile_sorted(sorted_data, stride, n, p, nsl_stats_quantile_type7); 0153 } 0154 0155 /* R^2 and adj. R^2 */ 0156 double nsl_stats_rsquare(double sse, double sst) { 0157 return 1. - sse / sst; 0158 } 0159 double nsl_stats_rsquareAdj(double rsquare, size_t np, size_t dof, int version) { 0160 size_t n = np + dof; 0161 // see https://stats.stackexchange.com/questions/48703/what-is-the-adjusted-r-squared-formula-in-lm-in-r-and-how-should-it-be-interpret 0162 switch (version) { 0163 case 2: 0164 return 1. - (1. - rsquare) * (n - 1.) / dof; 0165 default: 0166 return 1. - (1. - rsquare) * (n - 1.) / (dof - 1.); 0167 } 0168 } 0169 0170 /* t distribution */ 0171 double nsl_stats_tdist_t(double parameter, double error) { 0172 if (error > 0) 0173 return parameter / error; 0174 else 0175 return DBL_MAX; 0176 } 0177 0178 double nsl_stats_tdist_p(double t, double dof) { 0179 double p = 2. * gsl_cdf_tdist_Q(fabs(t), dof); 0180 if (p < 1.e-9) 0181 p = 0; 0182 return p; 0183 } 0184 double nsl_stats_tdist_z(double alpha, double dof) { 0185 return gsl_cdf_tdist_Pinv(1. - alpha / 2., dof); 0186 } 0187 double nsl_stats_tdist_margin(double alpha, double dof, double error) { 0188 return nsl_stats_tdist_z(alpha, dof) * error; 0189 } 0190 0191 /* chi^2 distribution */ 0192 double nsl_stats_chisq_p(double t, double dof) { 0193 double p = gsl_cdf_chisq_Q(t, dof); 0194 if (p < 1.e-9) 0195 p = 0; 0196 return p; 0197 } 0198 double nsl_stats_chisq_low(double alpha, double n) { 0199 return 0.5 * gsl_cdf_chisq_Pinv(alpha / 2., 2 * n); 0200 } 0201 double nsl_stats_chisq_high(double alpha, double n) { 0202 return 0.5 * gsl_cdf_chisq_Pinv(1. - alpha / 2., 2 * n + 2); 0203 } 0204 0205 /* F distribution */ 0206 double nsl_stats_fdist_F(double rsquare, size_t np, size_t dof) { 0207 // (sst/sse - 1.) * dof/(p-1) = dof/(p-1)/(1./R^2 - 1) 0208 if (np < 2) 0209 np = 2; 0210 return dof / (np - 1.) / (1. / rsquare - 1.); 0211 } 0212 double nsl_stats_fdist_p(double F, size_t np, double dof) { 0213 double p = gsl_cdf_fdist_Q(F, (double)np, dof); 0214 if (p < 1.e-9) 0215 p = 0; 0216 return p; 0217 } 0218 0219 /* log-likelihood */ 0220 double nsl_stats_logLik(double sse, size_t n) { 0221 double ll = -(double)n / 2. * log(sse / n) - n / 2. * log(2 * M_PI) - n / 2.; 0222 return ll; 0223 } 0224 0225 /* Akaike information criterion */ 0226 double nsl_stats_aic(double sse, size_t n, size_t np, int version) { 0227 switch (version) { 0228 case 2: 0229 return n * log(sse / n) + 2. * np; // reduced formula 0230 case 3: { 0231 double aic = n * log(sse / n) + 2. * np; 0232 if (n < 40 * np) // bias correction 0233 aic += 2. * np * (np + 1.) / (n - np - 1.); 0234 return aic; 0235 } 0236 default: 0237 return n * log(sse / n) + 2. * (np + 1) + n * log(2. * M_PI) + n; // complete formula used in R 0238 } 0239 } 0240 double nsl_stats_aicc(double sse, size_t n, size_t np, int version) { 0241 double aic; 0242 switch (version) { 0243 case 2: 0244 aic = n * log(sse / n) + 2. * np; 0245 break; 0246 default: 0247 aic = n * log(sse / n) + 2. * (np + 1) + n * log(2. * M_PI) + n; 0248 } 0249 return aic + 2. * np * (np + 1.) / (n - np - 1.); 0250 } 0251 0252 /* Bayasian information criterion */ 0253 double nsl_stats_bic(double sse, size_t n, size_t np, int version) { 0254 switch (version) { 0255 case 2: 0256 return n * log(sse / n) + np * log((double)n); // reduced formula 0257 default: 0258 return n * log(sse / n) + (np + 1) * log((double)n) + n + n * log(2. * M_PI); // complete formula used in R 0259 } 0260 }