File indexing completed on 2024-05-12 03:47:52
0001 /* 0002 File : nsl_kde.c 0003 Project : LabPlot 0004 Description : NSL functions for the kernel density estimation 0005 -------------------------------------------------------------------- 0006 SPDX-FileCopyrightText: 2021-2023 Alexander Semke <alexander.semke@web.de> 0007 SPDX-License-Identifier: GPL-2.0-or-later 0008 */ 0009 0010 #include "nsl_kde.h" 0011 0012 #include <gsl/gsl_math.h> 0013 #include <gsl/gsl_randist.h> 0014 #include <gsl/gsl_sort.h> 0015 #include <gsl/gsl_statistics.h> 0016 0017 double nsl_kde(const double* data, double x, nsl_kernel_type kernel, double h, size_t n) { 0018 double density = 0; 0019 switch (kernel) { 0020 case nsl_kernel_uniform: 0021 for (size_t i = 0; i < n; i++) 0022 density += nsl_sf_kernel_uniform((data[i] - x) / h); 0023 break; 0024 case nsl_kernel_triangular: 0025 for (size_t i = 0; i < n; i++) 0026 density += nsl_sf_kernel_triangular((data[i] - x) / h); 0027 break; 0028 case nsl_kernel_parabolic: 0029 for (size_t i = 0; i < n; i++) 0030 density += nsl_sf_kernel_parabolic((data[i] - x) / h); 0031 break; 0032 case nsl_kernel_quartic: 0033 for (size_t i = 0; i < n; i++) 0034 density += nsl_sf_kernel_quartic((data[i] - x) / h); 0035 break; 0036 case nsl_kernel_triweight: 0037 for (size_t i = 0; i < n; i++) 0038 density += nsl_sf_kernel_triweight((data[i] - x) / h); 0039 break; 0040 case nsl_kernel_tricube: 0041 for (size_t i = 0; i < n; i++) 0042 density += nsl_sf_kernel_tricube((data[i] - x) / h); 0043 break; 0044 case nsl_kernel_cosine: 0045 for (size_t i = 0; i < n; i++) 0046 density += nsl_sf_kernel_cosine((data[i] - x) / h); 0047 break; 0048 case nsl_kernel_gauss: 0049 for (size_t i = 0; i < n; i++) 0050 density += nsl_sf_kernel_gaussian((data[i] - x) / h); 0051 } 0052 0053 return density / (n * h); 0054 } 0055 0056 double nsl_kde_bandwidth(int n, double sigma, double iqr, nsl_kde_bandwidth_type type) { 0057 switch (type) { 0058 case nsl_kde_bandwidth_silverman: 0059 return 0.9 * GSL_MIN(sigma, iqr / 1.34) * pow(n, -0.2); 0060 case nsl_kde_bandwidth_scott: 0061 return 1.059 * sigma * pow(n, -0.2); 0062 case nsl_kde_bandwidth_custom: 0063 break; 0064 } 0065 0066 // return a small value to avoid division by zero by the caller, 0067 // should actually never happen since it's only relevant for 0068 // the case nsl_kde_bandwidth_custom which has to be handled by the caller anyway. 0069 return 1e-6; 0070 } 0071 0072 double nsl_kde_bandwidth_from_data(double* data, int n, nsl_kde_bandwidth_type type) { 0073 switch (type) { 0074 case nsl_kde_bandwidth_silverman: 0075 return nsl_kde_silverman_bandwidth(data, n); 0076 case nsl_kde_bandwidth_scott: 0077 return nsl_kde_scott_bandwidth(data, n); 0078 case nsl_kde_bandwidth_custom: 0079 break; 0080 } 0081 0082 // return a small value to avoid division by zero by the caller, 0083 // should actually never happen since it's only relevant for 0084 // the case nsl_kde_bandwidth_custom which has to be handled by the caller anyway. 0085 return 1e-6; 0086 } 0087 0088 double nsl_kde_silverman_bandwidth(double* data, int n) { 0089 gsl_sort(data, 1, n); 0090 const double sigma = gsl_stats_sd(data, 1, n); 0091 const double iqr = gsl_stats_quantile_from_sorted_data(data, 1, n, 0.75) - gsl_stats_quantile_from_sorted_data(data, 1, n, 0.25); 0092 0093 return 0.9 * GSL_MIN(sigma, iqr / 1.34) * pow(n, -0.2); 0094 } 0095 0096 double nsl_kde_scott_bandwidth(double* data, int n) { 0097 // gsl_sort(data, 1, n); 0098 const double sigma = gsl_stats_sd(data, 1, n); 0099 0100 return 1.059 * sigma * pow(n, 0.2); 0101 }