File indexing completed on 2024-05-12 03:47:52

0001 /*
0002     File                 : nsl_kde.c
0003     Project              : LabPlot
0004     Description          : NSL functions for the kernel density estimation
0005     --------------------------------------------------------------------
0006     SPDX-FileCopyrightText: 2021-2023 Alexander Semke <alexander.semke@web.de>
0007     SPDX-License-Identifier: GPL-2.0-or-later
0008 */
0009 
0010 #include "nsl_kde.h"
0011 
0012 #include <gsl/gsl_math.h>
0013 #include <gsl/gsl_randist.h>
0014 #include <gsl/gsl_sort.h>
0015 #include <gsl/gsl_statistics.h>
0016 
0017 double nsl_kde(const double* data, double x, nsl_kernel_type kernel, double h, size_t n) {
0018     double density = 0;
0019     switch (kernel) {
0020     case nsl_kernel_uniform:
0021         for (size_t i = 0; i < n; i++)
0022             density += nsl_sf_kernel_uniform((data[i] - x) / h);
0023         break;
0024     case nsl_kernel_triangular:
0025         for (size_t i = 0; i < n; i++)
0026             density += nsl_sf_kernel_triangular((data[i] - x) / h);
0027         break;
0028     case nsl_kernel_parabolic:
0029         for (size_t i = 0; i < n; i++)
0030             density += nsl_sf_kernel_parabolic((data[i] - x) / h);
0031         break;
0032     case nsl_kernel_quartic:
0033         for (size_t i = 0; i < n; i++)
0034             density += nsl_sf_kernel_quartic((data[i] - x) / h);
0035         break;
0036     case nsl_kernel_triweight:
0037         for (size_t i = 0; i < n; i++)
0038             density += nsl_sf_kernel_triweight((data[i] - x) / h);
0039         break;
0040     case nsl_kernel_tricube:
0041         for (size_t i = 0; i < n; i++)
0042             density += nsl_sf_kernel_tricube((data[i] - x) / h);
0043         break;
0044     case nsl_kernel_cosine:
0045         for (size_t i = 0; i < n; i++)
0046             density += nsl_sf_kernel_cosine((data[i] - x) / h);
0047         break;
0048     case nsl_kernel_gauss:
0049         for (size_t i = 0; i < n; i++)
0050             density += nsl_sf_kernel_gaussian((data[i] - x) / h);
0051     }
0052 
0053     return density / (n * h);
0054 }
0055 
0056 double nsl_kde_bandwidth(int n, double sigma, double iqr, nsl_kde_bandwidth_type type) {
0057     switch (type) {
0058     case nsl_kde_bandwidth_silverman:
0059         return 0.9 * GSL_MIN(sigma, iqr / 1.34) * pow(n, -0.2);
0060     case nsl_kde_bandwidth_scott:
0061         return 1.059 * sigma * pow(n, -0.2);
0062     case nsl_kde_bandwidth_custom:
0063         break;
0064     }
0065 
0066     // return a small value to avoid division by zero by the caller,
0067     // should actually never happen since it's only relevant for
0068     // the case nsl_kde_bandwidth_custom which has to be handled by the caller anyway.
0069     return 1e-6;
0070 }
0071 
0072 double nsl_kde_bandwidth_from_data(double* data, int n, nsl_kde_bandwidth_type type) {
0073     switch (type) {
0074     case nsl_kde_bandwidth_silverman:
0075         return nsl_kde_silverman_bandwidth(data, n);
0076     case nsl_kde_bandwidth_scott:
0077         return nsl_kde_scott_bandwidth(data, n);
0078     case nsl_kde_bandwidth_custom:
0079         break;
0080     }
0081 
0082     // return a small value to avoid division by zero by the caller,
0083     // should actually never happen since it's only relevant for
0084     // the case nsl_kde_bandwidth_custom which has to be handled by the caller anyway.
0085     return 1e-6;
0086 }
0087 
0088 double nsl_kde_silverman_bandwidth(double* data, int n) {
0089     gsl_sort(data, 1, n);
0090     const double sigma = gsl_stats_sd(data, 1, n);
0091     const double iqr = gsl_stats_quantile_from_sorted_data(data, 1, n, 0.75) - gsl_stats_quantile_from_sorted_data(data, 1, n, 0.25);
0092 
0093     return 0.9 * GSL_MIN(sigma, iqr / 1.34) * pow(n, -0.2);
0094 }
0095 
0096 double nsl_kde_scott_bandwidth(double* data, int n) {
0097     // gsl_sort(data, 1, n);
0098     const double sigma = gsl_stats_sd(data, 1, n);
0099 
0100     return 1.059 * sigma * pow(n, 0.2);
0101 }