File indexing completed on 2024-05-12 03:47:50
0001 /* 0002 File : nsl_baseline.c 0003 Project : LabPlot 0004 Description : NSL baseline detection and subtraction functions 0005 -------------------------------------------------------------------- 0006 SPDX-FileCopyrightText: 2023 Stefan Gerlach <stefan.gerlach@uni.kn> 0007 SPDX-License-Identifier: GPL-2.0-or-later 0008 */ 0009 0010 #include "nsl_baseline.h" 0011 #include "nsl_stats.h" 0012 0013 #include <QtGlobal> 0014 0015 #include <gsl/gsl_fit.h> 0016 #include <gsl/gsl_statistics_double.h> 0017 0018 #ifdef HAVE_EIGEN3 0019 #pragma GCC diagnostic push 0020 #pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant" 0021 #include <Eigen/Sparse> 0022 #include <Eigen/SparseCholesky> 0023 #pragma GCC diagnostic pop 0024 #else // GSL 0025 #include <gsl/gsl_linalg.h> 0026 #include <gsl/gsl_sort.h> 0027 #include <gsl/gsl_spblas.h> 0028 #include <gsl/gsl_splinalg.h> 0029 #include <gsl/gsl_spmatrix.h> 0030 0031 #include <cstring> // memcpy 0032 #endif 0033 0034 #include <iostream> 0035 0036 void nsl_baseline_remove_minimum(double* data, const size_t n) { 0037 const double min = nsl_stats_minimum(data, n, nullptr); 0038 0039 for (size_t i = 0; i < n; i++) 0040 data[i] -= min; 0041 } 0042 0043 void nsl_baseline_remove_maximum(double* data, const size_t n) { 0044 const double max = nsl_stats_maximum(data, n, nullptr); 0045 0046 for (size_t i = 0; i < n; i++) 0047 data[i] -= max; 0048 } 0049 0050 void nsl_baseline_remove_mean(double* data, const size_t n) { 0051 const double mean = gsl_stats_mean(data, 1, n); 0052 0053 for (size_t i = 0; i < n; i++) 0054 data[i] -= mean; 0055 } 0056 0057 void nsl_baseline_remove_median(double* data, const size_t n) { 0058 // copy data 0059 double* tmp_data = (double*)malloc(n * sizeof(double)); 0060 if (!tmp_data) 0061 return; 0062 memcpy(tmp_data, data, n * sizeof(double)); 0063 0064 const double median = gsl_stats_median(tmp_data, 1, n); // rearranges tmp_data 0065 // printf("MEDIAN = %g\n", median); 0066 0067 for (size_t i = 0; i < n; i++) 0068 data[i] -= median; 0069 0070 free(tmp_data); 0071 } 0072 0073 /* do a linear interpolation using first and last point and substract that */ 0074 int nsl_baseline_remove_endpoints(double* xdata, double* ydata, const size_t n) { 0075 // not possible 0076 if (xdata[0] == xdata[n - 1]) 0077 return -1; 0078 0079 for (size_t i = 0; i < n; i++) { 0080 // y = y1 + (x-x1)*(y2-y1)/(x2-x1) 0081 const double y = ydata[0] + (xdata[i] - xdata[0]) * (ydata[n - 1] - ydata[0]) / (xdata[n - 1] - xdata[0]); 0082 ydata[i] -= y; 0083 } 0084 0085 return 0; 0086 } 0087 0088 /* do a linear regression and substract that */ 0089 int nsl_baseline_remove_linreg(double* xdata, double* ydata, const size_t n) { 0090 double c0, c1, cov00, cov01, cov11, chisq; 0091 gsl_fit_linear(xdata, 1, ydata, 1, n, &c0, &c1, &cov00, &cov01, &cov11, &chisq); 0092 0093 for (size_t i = 0; i < n; i++) { 0094 double y, y_err; 0095 gsl_fit_linear_est(xdata[i], c0, c1, cov00, cov01, cov11, &y, &y_err); 0096 ydata[i] -= y; 0097 } 0098 0099 return 0; 0100 } 0101 0102 // Eigen3 version of ARPLS 0103 /* see https://pubs.rsc.org/en/content/articlelanding/2015/AN/C4AN01061B#!divAbstract */ 0104 double nsl_baseline_remove_arpls_Eigen3(double* data, const size_t n, double p, double lambda, int niter) { 0105 double crit = 1.; 0106 0107 #ifdef HAVE_EIGEN3 0108 typedef Eigen::SparseMatrix<double> SMat; // declares a column-major sparse matrix type of double 0109 typedef Eigen::SparseVector<double> SVec; // declares a sparse vector type of double 0110 0111 Eigen::SparseMatrix<double> D(n, n - 2); 0112 for (size_t i = 0; i < n; ++i) { 0113 for (size_t j = 0; j < n - 2; ++j) { 0114 if (i == j) 0115 D.insert(i, j) = 1.; 0116 if (i == j + 1) 0117 D.insert(i, j) = -2.; 0118 if (i == j + 2) 0119 D.insert(i, j) = 1.; 0120 } 0121 } 0122 // std::cout << "D =" << std::endl << D << std::endl; 0123 0124 // H = lambda * D.dot(D.T) 0125 SMat H = lambda * D * D.transpose(); 0126 // std::cout << "H =" << std::endl << H << std::endl; 0127 0128 // weights 0129 SVec w(n); 0130 SMat W(n, n); 0131 for (size_t i = 0; i < n; ++i) { 0132 w.insert(i) = 1.; 0133 W.insert(i, i) = 1.; 0134 } 0135 0136 SVec d(n), z(n); // data, solution (initial guess: data) 0137 for (size_t i = 0; i < n; i++) { 0138 d.insert(i) = data[i]; 0139 z.insert(i) = data[i]; 0140 } 0141 0142 int count = 0; 0143 while (crit > p) { 0144 printf("iteration %d:", count); 0145 0146 // solve (W+H)z = W*data 0147 Eigen::SimplicialLDLT<SMat> solver; 0148 solver.compute(W + H); 0149 if (solver.info() != Eigen::Success) 0150 puts("compute(): decomposition failed\n"); 0151 0152 z = solver.solve(W * d); 0153 if (solver.info() != Eigen::Success) 0154 puts("solve(): solving failed\n"); 0155 0156 // std::cout << "z = " << z << std::endl; 0157 0158 SVec diff = d - z; 0159 // std::cout << "diff = " << diff << std::endl; 0160 0161 // mean and stdev of negative diff values 0162 double m = 0.; 0163 int num = 0; 0164 for (SVec::InnerIterator it(diff); it; ++it) { 0165 double v = it.value(); 0166 if (v < 0) { 0167 m += v; 0168 num++; 0169 } 0170 } 0171 if (num > 0) 0172 m /= num; 0173 // printf("m = %g\n", m); 0174 double s = 0.; 0175 for (SVec::InnerIterator it(diff); it; ++it) { 0176 double v = it.value(); 0177 if (v < 0) 0178 s += (v - m) * (v - m); 0179 } 0180 if (num > 0) 0181 s /= num; 0182 s = sqrt(s); 0183 // printf("s = %g\n", s); 0184 0185 // w_new = 1 / (1 + np.exp(2 * (d - (2*s - m))/s)) 0186 SVec wn(n); 0187 for (size_t i = 0; i < n; ++i) 0188 wn.insert(i) = 1. / (1. + exp(2. * (diff.coeffRef(i) - (2. * s - m)) / s)); 0189 // std::cout << "wnvec = " << wnvec << std::endl; 0190 0191 // crit = norm(w_new - w) / norm(w) 0192 crit = (wn - w).norm() / w.norm(); 0193 printf(" crit = %.15g\n", crit); 0194 0195 w = wn; 0196 // W.setdiag(w) 0197 for (size_t i = 0; i < n; ++i) 0198 W.coeffRef(i, i) = wn.coeffRef(i); 0199 // std::cout << "Wmat = " << Wmat << std::endl; 0200 0201 count++; 0202 0203 if (count > niter) { 0204 puts("Maximum number of iterations reached"); 0205 break; 0206 } 0207 } 0208 0209 for (size_t i = 0; i < n; ++i) 0210 data[i] -= z.coeffRef(i); 0211 #else 0212 Q_UNUSED(data); 0213 Q_UNUSED(n); 0214 Q_UNUSED(p); 0215 Q_UNUSED(lambda); 0216 Q_UNUSED(niter); 0217 #endif 0218 0219 return crit; 0220 } 0221 0222 #ifndef HAVE_EIGEN3 0223 void show_matrix(gsl_spmatrix* M, size_t n, size_t m, char name) { 0224 printf("%c:\n", name); 0225 // for (size_t i = 0; i < n; ++i) { 0226 for (size_t i = 0; i < 5; ++i) { 0227 // for (size_t j = 0; j < m; ++j) 0228 for (size_t j = 0; j < 5; ++j) 0229 printf("%f ", gsl_spmatrix_get(M, i, j)); 0230 puts("\n"); 0231 } 0232 for (size_t i = n - 5; i < n; ++i) { 0233 for (size_t j = m - 5; j < m; ++j) 0234 printf("%f ", gsl_spmatrix_get(M, i, j)); 0235 puts("\n"); 0236 } 0237 } 0238 void show_vector(gsl_vector* v, size_t n, char name) { 0239 printf("%c:\n", name); 0240 for (size_t i = 0; i < 5; ++i) 0241 printf("%g ", gsl_vector_get(v, i)); 0242 printf(" .. "); 0243 for (size_t i = n - 5; i < n; ++i) 0244 printf("%g ", gsl_vector_get(v, i)); 0245 puts("\n"); 0246 } 0247 #endif 0248 0249 // GSL version of ARPLS (much slower than Eigen3 version) 0250 double nsl_baseline_remove_arpls_GSL(double* data, const size_t n, double p, double lambda, int niter) { 0251 double crit = 1.; 0252 0253 #ifndef HAVE_EIGEN3 0254 gsl_spmatrix* D = gsl_spmatrix_alloc(n, n - 2); 0255 for (size_t i = 0; i < n; ++i) { 0256 for (size_t j = 0; j < n - 2; ++j) { 0257 if (i == j) 0258 gsl_spmatrix_set(D, i, j, 1.); 0259 if (i == j + 1) 0260 gsl_spmatrix_set(D, i, j, -2.); 0261 if (i == j + 2) 0262 gsl_spmatrix_set(D, i, j, 1.); 0263 } 0264 } 0265 // show_matrix(D, n, n-2, 'D'); 0266 0267 // H = lambda * D.dot(D.T) 0268 gsl_spmatrix* DT = gsl_spmatrix_alloc(n - 2, n); 0269 gsl_spmatrix_transpose_memcpy(DT, D); 0270 0271 gsl_spmatrix* H = gsl_spmatrix_alloc_nzmax(n, n, 5 * n, GSL_SPMATRIX_CSC); 0272 // requires compressed format 0273 gsl_spmatrix* DD = gsl_spmatrix_ccs(D); 0274 gsl_spmatrix* DDT = gsl_spmatrix_ccs(DT); 0275 gsl_spblas_dgemm(lambda, DD, DDT, H); 0276 // show_matrix(H, n, n, 'H'); 0277 gsl_spmatrix_free(D); 0278 gsl_spmatrix_free(DT); 0279 0280 // weights 0281 gsl_vector* w = gsl_vector_alloc(n); 0282 gsl_spmatrix* W = gsl_spmatrix_alloc_nzmax(n, n, n, GSL_SPMATRIX_COO); 0283 for (size_t i = 0; i < n; ++i) { 0284 gsl_vector_set(w, i, 1.); 0285 gsl_spmatrix_set(W, i, i, 1.); 0286 } 0287 // show_matrix(W, n, n, 'W'); 0288 gsl_spmatrix* WW = gsl_spmatrix_ccs(W); 0289 0290 // loop 0291 gsl_vector* d = gsl_vector_alloc(n); // data 0292 for (size_t i = 0; i < n; i++) 0293 gsl_vector_set(d, i, data[i]); 0294 gsl_vector* z = gsl_vector_alloc(n); // solution 0295 /* initial guess z */ 0296 for (size_t i = 0; i < n; i++) 0297 gsl_vector_set(z, i, data[i]); 0298 // gsl_vector_set(z, i, 0); 0299 gsl_vector* diff = gsl_vector_alloc(n); // diff 0300 gsl_vector* w_new = gsl_vector_alloc(n); 0301 0302 gsl_spmatrix* A = gsl_spmatrix_alloc_nzmax(n, n, 5 * n, GSL_SPMATRIX_CSC); 0303 // gsl_spmatrix* C = gsl_spmatrix_alloc(n, n); 0304 // gsl_spmatrix* A = gsl_spmatrix_ccs(C); 0305 gsl_matrix* AA = gsl_matrix_alloc(n, n); 0306 gsl_vector* b = gsl_vector_alloc(n); 0307 gsl_permutation* per = gsl_permutation_alloc(n); 0308 int signum; 0309 // const gsl_splinalg_itersolve_type *T = gsl_splinalg_itersolve_gmres; 0310 // gsl_splinalg_itersolve *work = gsl_splinalg_itersolve_alloc(T, n, 0); 0311 int count = 0; 0312 while (crit > p) { 0313 printf("iteration %d\n", count); 0314 0315 // solve (W+H)z = W*data 0316 0317 // Az=b 0318 gsl_spmatrix_add(A, WW, H); 0319 if (count == 0) 0320 show_matrix(A, n, n, 'A'); 0321 0322 gsl_spblas_dgemv(CblasNoTrans, 1., W, d, 0., b); 0323 if (count == 0) 0324 show_vector(b, n, 'b'); 0325 0326 gsl_spmatrix_sp2d(AA, A); 0327 0328 // LU 0329 gsl_linalg_LU_decomp(AA, per, &signum); 0330 gsl_linalg_LU_solve(AA, per, b, z); 0331 0332 // LDLT: slower 0333 // gsl_linalg_ldlt_decomp(AA); 0334 // gsl_linalg_ldlt_solve(AA, b, z); 0335 // Householder: slowest 0336 // gsl_linalg_HH_solve(AA, b, z); 0337 // sparse iterative solver: not working 0338 /* int iter = 0, maxiter = 10, status; 0339 double tol = 1.0e-3; 0340 do { 0341 status = gsl_splinalg_itersolve_iterate(A, b, tol, z, work); 0342 0343 double residual = gsl_splinalg_itersolve_normr(work); 0344 fprintf(stderr, "iter %d residual = %.12e\n", iter, residual); 0345 } while (status == GSL_CONTINUE && ++iter < maxiter); 0346 */ 0347 // show_vector(z, n, 'z'); 0348 0349 for (size_t i = 0; i < n; ++i) 0350 gsl_vector_set(diff, i, gsl_vector_get(d, i) - gsl_vector_get(z, i)); 0351 // show_vector(diff, n, 'D'); 0352 0353 // mean and stdev of negative diffs 0354 double m = 0.; 0355 int num = 0; 0356 for (size_t i = 0; i < n; ++i) { 0357 double v = gsl_vector_get(diff, i); 0358 if (v < 0) { 0359 m += v; 0360 num++; 0361 } 0362 } 0363 if (num > 0) 0364 m /= num; 0365 // printf("m = %g\n", m); 0366 double s = 0.; 0367 for (size_t i = 0; i < n; ++i) { 0368 double v = gsl_vector_get(diff, i); 0369 if (v < 0) 0370 s += gsl_pow_2(v - m); 0371 } 0372 if (num > 0) 0373 s /= num; 0374 s = sqrt(s); 0375 // printf("s = %g\n", s); 0376 0377 // w_new = 1 / (1 + np.exp(2 * (d - (2*s - m))/s)) 0378 for (size_t i = 0; i < n; ++i) 0379 gsl_vector_set(w_new, i, 1. / (1. + exp(2. * (gsl_vector_get(diff, i) - (2. * s - m)) / s))); 0380 // show_vector(w_new, n, 'n'); 0381 0382 // crit = norm(w_new - w) / norm(w) 0383 double norm = 0.; 0384 for (size_t i = 0; i < n; ++i) { 0385 norm += gsl_pow_2(gsl_vector_get(w_new, i) - gsl_vector_get(w, i)); 0386 } 0387 norm = sqrt(norm); 0388 crit = norm / gsl_blas_dnrm2(w); 0389 // printf("norm (w_new - w) = %g\n", norm); 0390 // printf("norm (w) = %g\n", gsl_blas_dnrm2(w)); 0391 printf("crit = %g\n", crit); 0392 0393 // w = w_new 0394 gsl_vector_memcpy(w, w_new); 0395 // W.setdiag(w) 0396 for (size_t i = 0; i < n; ++i) 0397 gsl_spmatrix_set(W, i, i, gsl_vector_get(w_new, i)); 0398 // show_matrix(W, n, n, 'W'); 0399 WW = gsl_spmatrix_ccs(W); 0400 0401 count++; 0402 0403 if (count > niter) { 0404 puts("Maximum number of iterations reached"); 0405 break; 0406 } 0407 } 0408 0409 for (size_t i = 0; i < n; ++i) { 0410 // printf("%.15g ", data[i] - gsl_vector_get(z, i)); 0411 data[i] -= gsl_vector_get(z, i); 0412 } 0413 // puts("\n"); 0414 0415 // gsl_splinalg_itersolve_free(work); 0416 gsl_matrix_free(AA); 0417 gsl_spmatrix_free(A); 0418 gsl_vector_free(b); 0419 gsl_spmatrix_free(H); 0420 gsl_vector_free(w); 0421 gsl_spmatrix_free(W); 0422 gsl_vector_free(w_new); 0423 gsl_vector_free(diff); 0424 gsl_vector_free(z); 0425 gsl_vector_free(d); 0426 #else 0427 Q_UNUSED(data); 0428 Q_UNUSED(n); 0429 Q_UNUSED(p); 0430 Q_UNUSED(lambda); 0431 Q_UNUSED(niter); 0432 #endif 0433 0434 return crit; 0435 } 0436 0437 double nsl_baseline_remove_arpls(double* data, const size_t n, double p, double lambda, int niter) { 0438 // default values 0439 if (p == 0) 0440 p = 0.001; 0441 if (lambda == 0) 0442 lambda = 1.e4; 0443 if (niter == 0) 0444 niter = 10; 0445 0446 #ifdef HAVE_EIGEN3 0447 return nsl_baseline_remove_arpls_Eigen3(data, n, p, lambda, niter); 0448 #endif 0449 0450 return nsl_baseline_remove_arpls_GSL(data, n, p, lambda, niter); 0451 }