File indexing completed on 2025-02-02 04:43:00

0001 /*
0002 SPDX-FileCopyrightText: 2003-2004 Mark Borgerding <Mark@Borgerding.net>
0003 SPDX-License-Identifier: BSD-3-Clause
0004 All rights reserved.
0005 
0006 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
0007 
0008     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
0009     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
0010     * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
0011 
0012 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0013 */
0014 
0015 #include "../_kiss_fft_guts.h"
0016 
0017 
0018 /*
0019  Some definitions that allow real or complex filtering
0020 */
0021 /*REAL_FASTFIR*/
0022 #define MIN_FFT_LEN 2048
0023 #include "kiss_fftr.h"
0024 typedef kiss_fft_scalar kffsamp_t;
0025 typedef kiss_fftr_cfg kfcfg_t;
0026 #define FFT_ALLOC kiss_fftr_alloc
0027 #define FFTFWD kiss_fftr
0028 #define FFTINV kiss_fftri
0029 
0030 typedef struct kiss_fastfir_state *kiss_fastfir_cfg;
0031 
0032 
0033 
0034 kiss_fastfir_cfg kiss_fastfir_alloc(const kffsamp_t * imp_resp,size_t n_imp_resp,
0035         size_t * nfft,void * mem,size_t*lenmem);
0036 
0037 /* see do_file_filter for usage */
0038 size_t kiss_fastfir( kiss_fastfir_cfg cfg, kffsamp_t * inbuf, kffsamp_t * outbuf, size_t n, size_t *offset);
0039 
0040 
0041 
0042 static int verbose=0;
0043 
0044 
0045 struct kiss_fastfir_state{
0046     size_t nfft;
0047     size_t ngood;
0048     kfcfg_t fftcfg;
0049     kfcfg_t ifftcfg;
0050     kiss_fft_cpx * fir_freq_resp;
0051     kiss_fft_cpx * freqbuf;
0052     size_t n_freq_bins;
0053     kffsamp_t * tmpbuf;
0054 };
0055 
0056 
0057 kiss_fastfir_cfg kiss_fastfir_alloc(
0058         const kffsamp_t * imp_resp,size_t n_imp_resp,
0059         size_t *pnfft, /* if <= 0, an appropriate size will be chosen */
0060         void * mem,size_t*lenmem)
0061 {
0062     kiss_fastfir_cfg st = NULL;
0063     size_t len_fftcfg,len_ifftcfg;
0064     size_t memneeded = sizeof(struct kiss_fastfir_state);
0065     char * ptr;
0066     size_t i;
0067     size_t nfft=0;
0068     float scale;
0069     int n_freq_bins;
0070     if (pnfft)
0071         nfft=*pnfft;
0072 
0073     if (nfft==0) {
0074         /* determine fft size as next power of two at least 2x 
0075          the impulse response length*/
0076         i=n_imp_resp-1;
0077         nfft=2;
0078         do{
0079              nfft<<=1;
0080         }while (i>>=1);
0081         if ( nfft < MIN_FFT_LEN )
0082             nfft=MIN_FFT_LEN;
0083     }
0084     if (pnfft)
0085         *pnfft = nfft;
0086 
0087     n_freq_bins = nfft/2 + 1;
0088     /*fftcfg*/
0089     FFT_ALLOC (nfft, 0, NULL, &len_fftcfg);
0090     memneeded += len_fftcfg;  
0091     /*ifftcfg*/
0092     FFT_ALLOC (nfft, 1, NULL, &len_ifftcfg);
0093     memneeded += len_ifftcfg;  
0094     /* tmpbuf */
0095     memneeded += sizeof(kffsamp_t) * nfft;
0096     /* fir_freq_resp */
0097     memneeded += sizeof(kiss_fft_cpx) * n_freq_bins;
0098     /* freqbuf */
0099     memneeded += sizeof(kiss_fft_cpx) * n_freq_bins;
0100     
0101     if (lenmem == NULL) {
0102         st = (kiss_fastfir_cfg) malloc (memneeded);
0103     } else {
0104         if (*lenmem >= memneeded)
0105             st = (kiss_fastfir_cfg) mem;
0106         *lenmem = memneeded;
0107     }
0108     if (!st)
0109         return NULL;
0110 
0111     st->nfft = nfft;
0112     st->ngood = nfft - n_imp_resp + 1;
0113     st->n_freq_bins = n_freq_bins;
0114     ptr=(char*)(st+1);
0115 
0116     st->fftcfg = (kfcfg_t)ptr;
0117     ptr += len_fftcfg;
0118 
0119     st->ifftcfg = (kfcfg_t)ptr;
0120     ptr += len_ifftcfg;
0121 
0122     st->tmpbuf = (kffsamp_t*)ptr;
0123     ptr += sizeof(kffsamp_t) * nfft;
0124 
0125     st->freqbuf = (kiss_fft_cpx*)ptr;
0126     ptr += sizeof(kiss_fft_cpx) * n_freq_bins;
0127     
0128     st->fir_freq_resp = (kiss_fft_cpx*)ptr;
0129     ptr += sizeof(kiss_fft_cpx) * n_freq_bins;
0130 
0131     FFT_ALLOC (nfft,0,st->fftcfg , &len_fftcfg);
0132     FFT_ALLOC (nfft,1,st->ifftcfg , &len_ifftcfg);
0133 
0134     memset(st->tmpbuf,0,sizeof(kffsamp_t)*nfft);
0135     /*zero pad in the middle to left-rotate the impulse response 
0136       This puts the scrap samples at the end of the inverse fft'd buffer */
0137     st->tmpbuf[0] = imp_resp[ n_imp_resp - 1 ];
0138     for (i=0;i<n_imp_resp - 1; ++i) {
0139         st->tmpbuf[ nfft - n_imp_resp + 1 + i ] = imp_resp[ i ];
0140     }
0141 
0142     FFTFWD(st->fftcfg,st->tmpbuf,st->fir_freq_resp);
0143 
0144     /* TODO: this won't work for fixed point */
0145     scale = 1.0 / st->nfft;
0146 
0147     for ( i=0; i < st->n_freq_bins; ++i ) {
0148 #ifdef USE_SIMD
0149         st->fir_freq_resp[i].r *= _mm_set1_ps(scale);
0150         st->fir_freq_resp[i].i *= _mm_set1_ps(scale);
0151 #else
0152         st->fir_freq_resp[i].r *= scale;
0153         st->fir_freq_resp[i].i *= scale;
0154 #endif
0155     }
0156     return st;
0157 }
0158 
0159 static void fastconv1buf(const kiss_fastfir_cfg st,const kffsamp_t * in,kffsamp_t * out)
0160 {
0161     size_t i;
0162     /* multiply the frequency response of the input signal by
0163      that of the fir filter*/
0164     FFTFWD( st->fftcfg, in , st->freqbuf );
0165     for ( i=0; i<st->n_freq_bins; ++i ) {
0166         kiss_fft_cpx tmpsamp; 
0167         C_MUL(tmpsamp,st->freqbuf[i],st->fir_freq_resp[i]);
0168         st->freqbuf[i] = tmpsamp;
0169     }
0170 
0171     /* perform the inverse fft*/
0172     FFTINV(st->ifftcfg,st->freqbuf,out);
0173 }
0174 
0175 /* n : the size of inbuf and outbuf in samples
0176    return value: the number of samples completely processed
0177    n-retval samples should be copied to the front of the next input buffer */
0178 static size_t kff_nocopy(
0179         kiss_fastfir_cfg st,
0180         const kffsamp_t * inbuf, 
0181         kffsamp_t * outbuf,
0182         size_t n)
0183 {
0184     size_t norig=n;
0185     while (n >= st->nfft ) {
0186         fastconv1buf(st,inbuf,outbuf);
0187         inbuf += st->ngood;
0188         outbuf += st->ngood;
0189         n -= st->ngood;
0190     }
0191     return norig - n;
0192 }
0193 
0194 static
0195 size_t kff_flush(kiss_fastfir_cfg st,const kffsamp_t * inbuf,kffsamp_t * outbuf,size_t n)
0196 {
0197     size_t zpad=0,ntmp;
0198 
0199     ntmp = kff_nocopy(st,inbuf,outbuf,n);
0200     n -= ntmp;
0201     inbuf += ntmp;
0202     outbuf += ntmp;
0203 
0204     zpad = st->nfft - n;
0205     memset(st->tmpbuf,0,sizeof(kffsamp_t)*st->nfft );
0206     memcpy(st->tmpbuf,inbuf,sizeof(kffsamp_t)*n );
0207     
0208     fastconv1buf(st,st->tmpbuf,st->tmpbuf);
0209     
0210     memcpy(outbuf,st->tmpbuf,sizeof(kffsamp_t)*( st->ngood - zpad ));
0211     return ntmp + st->ngood - zpad;
0212 }
0213 
0214 size_t kiss_fastfir(
0215         kiss_fastfir_cfg vst,
0216         kffsamp_t * inbuf,
0217         kffsamp_t * outbuf,
0218         size_t n_new,
0219         size_t *offset)
0220 {
0221     size_t ntot = n_new + *offset;
0222     if (n_new==0) {
0223         return kff_flush(vst,inbuf,outbuf,ntot);
0224     }else{
0225         size_t nwritten = kff_nocopy(vst,inbuf,outbuf,ntot);
0226         *offset = ntot - nwritten;
0227         /*save the unused or underused samples at the front of the input buffer */
0228         memcpy( inbuf , inbuf+nwritten , *offset * sizeof(kffsamp_t) );
0229         return nwritten;
0230     }
0231 }
0232 
0233 /*FAST_FILT_UTIL*/
0234 #include <unistd.h>
0235 #include <sys/types.h>
0236 #include <sys/mman.h>
0237 #include <assert.h>
0238 
0239 static
0240 void direct_file_filter(
0241         FILE * fin,
0242         FILE * fout,
0243         const kffsamp_t * imp_resp,
0244         size_t n_imp_resp)
0245 {
0246     size_t nlag = n_imp_resp - 1;
0247 
0248     const kffsamp_t *tmph;
0249     kffsamp_t *buf, *circbuf;
0250     kffsamp_t outval;
0251     size_t nread;
0252     size_t nbuf;
0253     size_t oldestlag = 0;
0254     size_t k, tap;
0255 
0256     nbuf = 4096;
0257     buf = (kffsamp_t *) malloc ( sizeof (kffsamp_t) * nbuf);
0258     circbuf = (kffsamp_t *) malloc (sizeof (kffsamp_t) * nlag);
0259     if (!circbuf || !buf) {
0260         perror("circbuf allocation");
0261         exit(1);
0262     }
0263 
0264     if ( fread (circbuf, sizeof (kffsamp_t), nlag, fin) !=  nlag ) {
0265         perror ("insufficient data to overcome transient");
0266         exit (1);
0267     }
0268 
0269     do {
0270         nread = fread (buf, sizeof (kffsamp_t), nbuf, fin);
0271         if (nread == 0)
0272             break;
0273 
0274         for (k = 0; k < nread; ++k) {
0275             tmph = imp_resp+nlag;
0276 /*REAL_FASTFIR*/
0277 # ifdef USE_SIMD
0278             outval = _mm_set1_ps(0);
0279 #else
0280             outval = 0;
0281 #endif
0282             for (tap = oldestlag; tap < nlag; ++tap)
0283                 outval += circbuf[tap] * *tmph--;
0284             for (tap = 0; tap < oldestlag; ++tap)
0285                 outval += circbuf[tap] * *tmph--;
0286             outval += buf[k] * *tmph;
0287 
0288             circbuf[oldestlag++] = buf[k];
0289             buf[k] = outval;
0290 
0291             if (oldestlag == nlag)
0292                 oldestlag = 0;
0293         }
0294 
0295         if (fwrite (buf, sizeof (buf[0]), nread, fout) != nread) {
0296             perror ("short write");
0297             exit (1);
0298         }
0299     } while (nread);
0300     free (buf);
0301     free (circbuf);
0302 }
0303 
0304 static
0305 void do_file_filter(
0306         FILE * fin,
0307         FILE * fout,
0308         const kffsamp_t * imp_resp,
0309         size_t n_imp_resp,
0310         size_t nfft )
0311 {
0312     int fdout;
0313     size_t n_samps_buf;
0314 
0315     kiss_fastfir_cfg cfg;
0316     kffsamp_t *inbuf,*outbuf;
0317     int nread;
0318     size_t idx_inbuf;
0319 
0320     fdout = fileno(fout);
0321 
0322     cfg=kiss_fastfir_alloc(imp_resp,n_imp_resp,&nfft,0,0);
0323 
0324     /* use length to minimize buffer shift*/
0325     n_samps_buf = nfft + 4*(nfft-n_imp_resp+1);
0326 
0327     if (verbose) fprintf(stderr,"bufsize=%d\n",sizeof(kffsamp_t)*n_samps_buf );
0328      
0329 
0330     /*allocate space and initialize pointers */
0331     inbuf = (kffsamp_t*)malloc(sizeof(kffsamp_t)*n_samps_buf);
0332     outbuf = (kffsamp_t*)malloc(sizeof(kffsamp_t)*n_samps_buf);
0333 
0334     idx_inbuf=0;
0335     do{
0336         /* start reading at inbuf[idx_inbuf] */
0337         nread = fread( inbuf + idx_inbuf, sizeof(kffsamp_t), n_samps_buf - idx_inbuf,fin );
0338 
0339         /* If nread==0, then this is a flush.
0340             The total number of samples in input is idx_inbuf + nread . */
0341         int nwrite = kiss_fastfir(cfg, inbuf, outbuf,nread,&idx_inbuf) * sizeof(kffsamp_t);
0342         /* kiss_fastfir moved any unused samples to the front of inbuf and updated idx_inbuf */
0343 
0344         if ( write(fdout, outbuf, nwrite) != nwrite ) {
0345             perror("short write");
0346             exit(1);
0347         }
0348     }while ( nread );
0349     free(cfg);
0350     free(inbuf);
0351     free(outbuf);
0352 }
0353 
0354 int main(int argc,char**argv)
0355 {
0356     kffsamp_t * h;
0357     int use_direct=0;
0358     size_t nh,nfft=0;
0359     FILE *fin=stdin;
0360     FILE *fout=stdout;
0361     FILE *filtfile=NULL;
0362     while (1) {
0363         int c=getopt(argc,argv,"n:h:i:o:vd");
0364         if (c==-1) break;
0365         switch (c) {
0366             case 'v':
0367                 verbose=1;
0368                 break;
0369             case 'n':
0370                 nfft=atoi(optarg);
0371                 break;
0372             case 'i':
0373                 fin = fopen(optarg,"rb");
0374                 if (fin==NULL) {
0375                     perror(optarg);
0376                     exit(1);
0377                 }
0378                 break;
0379             case 'o':
0380                 fout = fopen(optarg,"w+b");
0381                 if (fout==NULL) {
0382                     perror(optarg);
0383                     exit(1);
0384                 }
0385                 break;
0386             case 'h':
0387                 filtfile = fopen(optarg,"rb");
0388                 if (filtfile==NULL) {
0389                     perror(optarg);
0390                     exit(1);
0391                 }
0392                 break;
0393             case 'd':
0394                 use_direct=1;
0395                 break;
0396             case '?':
0397                      fprintf(stderr,"usage options:\n"
0398                             "\t-n nfft: fft size to use\n"
0399                             "\t-d : use direct FIR filtering, not fast convolution\n"
0400                             "\t-i filename: input file\n"
0401                             "\t-o filename: output(filtered) file\n"
0402                             "\t-n nfft: fft size to use\n"
0403                             "\t-h filename: impulse response\n");
0404                      exit (1);
0405             default:fprintf(stderr,"bad %c\n",c);break;
0406         }
0407     }
0408     if (filtfile==NULL) {
0409         fprintf(stderr,"You must supply the FIR coeffs via -h\n");
0410         exit(1);
0411     }
0412     fseek(filtfile,0,SEEK_END);
0413     nh = ftell(filtfile) / sizeof(kffsamp_t);
0414     if (verbose) fprintf(stderr,"%d samples in FIR filter\n",(int)nh);
0415     h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh);
0416     fseek(filtfile,0,SEEK_SET);
0417     fread(h,sizeof(kffsamp_t),nh,filtfile);
0418     fclose(filtfile);
0419  
0420     if (use_direct)
0421         direct_file_filter( fin, fout, h,nh);
0422     else
0423         do_file_filter( fin, fout, h,nh,nfft);
0424 
0425     if (fout!=stdout) fclose(fout);
0426     if (fin!=stdin) fclose(fin);
0427 
0428     free(h);
0429     return 0;
0430 }