FastDDC code added, and now it builds!

ha7ilm · ha7ilm · commit edc2c21e436e · 2015-11-07T19:21:08.000+01:00
diff --git a/csdr.c b/csdr.c
@@ -48,6 +48,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "ima_adpcm.h"
 #include <sched.h>
 #include <math.h>
+#include <strings.h>
+#include "fastddc.h"
 
 char usage[]=
 "csdr - a simple commandline tool for Software Defined Radio receiver DSP.\n\n"
@@ -1269,7 +1271,6 @@ int main(int argc, char *argv[])
 		float high_cut;
 		float transition_bw;
 		window_t window = WINDOW_DEFAULT;
-		char window_string[256]; //TODO: nice buffer overflow opportunity 
 		
 		int fd;
 		if(fd=init_fifo(argc,argv))
@@ -1646,6 +1647,105 @@ int main(int argc, char *argv[])
 		}		
 	}
 
+	if( !strcmp(argv[1],"fastddc_fwd_cc") ) //<decimation> [transition_bw [window]]
+	{	
+		int decimation;
+		if(argc<=2) return badsyntax("need required parameter (decimation)");
+		sscanf(argv[2],"%d",&decimation);
+		
+		float transition_bw = 0.05;
+		if(argc>=3) sscanf(argv[3],"%g",&transition_bw);
+
+		window_t window = WINDOW_DEFAULT;
+		if(argc>=4)	window=firdes_get_window_from_string(argv[5]);
+		else fprintf(stderr,"fastddc_fwd_cc: window = %s\n",firdes_get_string_from_window(window));
+
+		fastddc_t ddc; 
+		if(fastddc_init(&ddc, transition_bw, decimation, 0)) { badsyntax("error in fastddc_init()"); return 1; }
+		fastddc_print(&ddc);
+
+		if(!initialize_buffers()) return -2;
+		sendbufsize(ddc.fft_size);
+
+		//make FFT plan
+		complexf* input = 	 (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_size);
+		complexf* windowed = (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_size);
+		complexf* output =   (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_size);
+
+		for(int i=0;i<ddc.fft_size;i++) iof(input,i)=qof(input,i)=0; //null the input buffer
+
+		int benchmark = 1; 
+		if(benchmark) fprintf(stderr,"fastddc_fwd_cc: benchmarking FFT...");
+		FFT_PLAN_T* plan=make_fft_c2c(ddc.fft_size, windowed, output, 1, benchmark);
+		if(benchmark) fprintf(stderr," done\n");
+
+		for(;;)
+		{
+			FEOF_CHECK;
+			//overlapped FFT
+			for(int i=0;i<ddc.overlap_length;i++) input[i]=input[i+ddc.input_size];
+			fread(input+ddc.overlap_length, sizeof(complexf), ddc.input_size, stdin);
+			apply_window_c(input,windowed,ddc.fft_size,window);
+			fft_execute(plan);
+			fwrite(output, sizeof(complexf), ddc.fft_size, stdout);
+			TRY_YIELD;
+		}
+	}
+
+	if( !strcmp(argv[1],"fastddc_apply_cc") ) //<decimation> <shift_rate> [transition_bw [window]]
+	{	
+		int decimation;
+		if(argc<=2) return badsyntax("need required parameter (decimation)");
+		sscanf(argv[2],"%d",&decimation);
+
+		float shift_rate;
+		if(argc>=3) sscanf(argv[3],"%g",&shift_rate);		
+
+		float transition_bw = 0.05;
+		if(argc>=4) sscanf(argv[4],"%g",&transition_bw);
+
+		window_t window = WINDOW_DEFAULT;
+		if(argc>=5)	window=firdes_get_window_from_string(argv[5]);
+		else fprintf(stderr,"fastddc_apply_cc: window = %s\n",firdes_get_string_from_window(window));
+
+		fastddc_t ddc; 
+		if(fastddc_init(&ddc, transition_bw, decimation, shift_rate)) { badsyntax("error in fastddc_init()"); return 1; }
+		fastddc_print(&ddc);
+
+		if(!initialize_buffers()) return -2;
+		sendbufsize(ddc.output_size);
+
+		//prepare making the filter and doing FFT on it
+		complexf* taps=(complexf*)calloc(sizeof(complexf),ddc.fft_size); //initialize to zero
+		complexf* taps_fft=(complexf*)malloc(sizeof(complexf)*ddc.fft_size);
+		FFT_PLAN_T* plan_taps = make_fft_c2c(ddc.fft_size, taps, taps_fft, 1, 0); //forward, don't benchmark (we need this only once)
+
+		//make the filter
+		float filter_half_bw = 0.5/decimation;
+		firdes_bandpass_c(taps, ddc.taps_real_length, shift_rate-filter_half_bw, shift_rate+filter_half_bw, window);
+		fft_execute(plan_taps);
+
+		//make FFT plan
+		complexf* input = 	 (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_size);
+		complexf* output =   (complexf*)fft_malloc(sizeof(complexf)*ddc.output_size);
+
+		int benchmark = 1; 
+		if(benchmark) fprintf(stderr,"fastddc_apply_cc: benchmarking FFT...");
+		FFT_PLAN_T* plan_inverse = make_fft_c2c(ddc.fft_size, input, output, 0, 1); //inverse, do benchmark
+		if(benchmark) fprintf(stderr," done\n");
+
+		decimating_shift_addition_status_t shift_stat;
+		bzero(&shift_stat, sizeof(shift_stat));
+		for(;;)
+		{
+			FEOF_CHECK;
+			fread(input, sizeof(complexf), ddc.fft_size, stdin);
+			shift_stat = fastddc_apply_cc(input, output, &ddc, plan_inverse, taps_fft, shift_stat);
+			fwrite(output, sizeof(complexf), ddc.output_size, stdout);
+			TRY_YIELD;
+		}
+	}
+
 	if(!strcmp(argv[1],"none"))
 	{
 		return 0;
diff --git a/fastddc.c b/fastddc.c
@@ -0,0 +1,125 @@
+/*
+This software is part of libcsdr, a set of simple DSP routines for 
+Software Defined Radio.
+
+Copyright (c) 2014, Andras Retzler <randras@sdr.hu>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ANDRAS RETZLER BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "fastddc.h"
+
+//DDC implementation based on:
+//http://www.3db-labs.com/01598092_MultibandFilterbank.pdf
+
+inline int is_integer(float a) { return floorf(a) == a; }
+
+int fastddc_init(fastddc_t* ddc, float transition_bw, int decimation, float shift_rate)
+{
+	ddc->pre_decimation = 1; //this will be done in the frequency domain
+	ddc->post_decimation = decimation; //this will be done in the time domain
+	while( is_integer((float)ddc->post_decimation/2) && ddc->post_decimation/2 != 1) 
+	{
+		ddc->post_decimation/=2;
+		ddc->pre_decimation*=2;
+	}
+	ddc->taps_real_length = firdes_filter_len(transition_bw); //the number of non-zero taps
+	ddc->taps_length = ceil(ddc->taps_real_length/(float)ddc->pre_decimation) * ddc->pre_decimation; //the number of taps must be a multiple of the decimation factor
+	ddc->fft_size = next_pow2(ddc->taps_length * 4); //it is a good rule of thumb for performance (based on the article), but we should do benchmarks
+	while (ddc->fft_size<ddc->pre_decimation) ddc->fft_size*=2; //fft_size should be a multiple of pre_decimation
+	ddc->overlap_length = ddc->taps_length - 1;
+	ddc->input_size = ddc->fft_size - ddc->overlap_length;
+	ddc->fft_inv_size = ddc->fft_size / ddc->pre_decimation;
+
+	//Shift operation in the frequency domain: we can shift by a multiple of v.
+	ddc->v = ddc->fft_size/ddc->overlap_length; //+-1 ? (or maybe ceil() this?) //TODO: why?
+	int middlebin=ddc->fft_size / 2;
+	ddc->startbin = middlebin + middlebin * shift_rate * 2;
+	ddc->startbin = ddc->v * round( ddc->startbin / (float)ddc->v );
+	ddc->offsetbin = ddc->startbin - middlebin;
+	ddc->post_shift = ((float)ddc->offsetbin/ddc->fft_size) - shift_rate;
+	ddc->pre_shift = ddc->offsetbin * ddc->v;
+
+	//Overlap is scraped, not added
+	ddc->scrape=ddc->overlap_length/ddc->pre_decimation;
+	ddc->output_size=ddc->fft_inv_size-ddc->scrape;
+
+	return ddc->fft_size<=2; //returns true on error
+}
+
+
+void fastddc_print(fastddc_t* ddc)
+{
+	fprintf(stderr,
+		"fastddc_print_sizes(): (fft_size = %d) = (taps_length = %d) + (input_size = %d) - 1\n"
+		"\t(overlap_length = %d) = taps_length - 1, taps_real_length = %d\n"
+		"\tdecimation = (pre_decimation = %d) * (post_decimation = %d), fft_inv_size = %d\n"
+		"\tstartbin = %d, offsetbin = %d, v = %d, pre_shift = %g, post_shift = %g\n"
+		, 
+		ddc->fft_size, ddc->taps_length, ddc->input_size, 
+		ddc->overlap_length, ddc->taps_real_length,
+		ddc->pre_decimation, ddc->post_decimation, ddc->fft_inv_size,
+		ddc->startbin, ddc->offsetbin, ddc->v, ddc->pre_shift, ddc->post_shift );
+}
+
+decimating_shift_addition_status_t fastddc_apply_cc(complexf* input, complexf* output, fastddc_t* ddc, FFT_PLAN_T* plan_inverse, complexf* taps_fft, decimating_shift_addition_status_t shift_stat)
+{
+	//implements DDC by using the overlap & scrape method
+	//TODO: +/-1s on overlap_size et al
+	//input shoud have ddc->fft_size number of elements
+
+	complexf* inv_input = plan_inverse->input;
+	complexf* inv_output = plan_inverse->output;
+
+	//Initialize buffers for inverse FFT to zero
+	for(int i=0;i<plan_inverse->size;i++)
+	{
+		iof(inv_input,i)=0;
+		qof(inv_input,i)=0;
+	}
+
+	//Alias & shift & filter at once
+	// * no, we won't break this algorithm to parts that are easier to understand: now we go for speed
+	for(int i=0;i<ddc->fft_size;i++)
+	{
+		int output_index = (ddc->startbin+i)%plan_inverse->size;
+		int tap_index = (ddc->fft_size+i-ddc->offsetbin)%ddc->fft_size;
+		cmultadd(inv_input+output_index, input+i, taps_fft+tap_index); //cmultadd(output, input1, input2):   complex output += complex input1 * complex input 2
+	}
+
+	fft_execute(plan_inverse);
+
+	//Normalize data
+	for(int i=0;i<plan_inverse->size;i++) //@apply_ddc_fft_cc: normalize by size
+	{
+		iof(inv_output,i)/=plan_inverse->size;
+		qof(inv_output,i)/=plan_inverse->size;
+	}
+	
+	//Overlap is scraped, not added
+	//Shift correction
+	shift_addition_data_t dsadata=decimating_shift_addition_init(ddc->post_shift, ddc->post_decimation); //this could be optimized (passed as parameter), but we would not win too much at all 
+	shift_stat=decimating_shift_addition_cc(plan_inverse->output+ddc->scrape, output, ddc->output_size, dsadata, ddc->post_decimation, shift_stat);
+	return shift_stat;
+}
diff --git a/fastddc.h b/fastddc.h
@@ -0,0 +1,27 @@
+#include <math.h>
+#include "libcsdr.h"
+#include "libcsdr_gpl.h"
+
+typedef struct fastddc_s
+{
+	int pre_decimation;
+	int post_decimation;
+	int taps_length; 
+	int taps_real_length;
+	int overlap_length; //it is taps_length - 1
+	int fft_size;
+	int fft_inv_size;
+	int input_size;
+	int output_size;
+	float pre_shift;
+	int startbin; //for pre_shift
+	int v; //step for pre_shift
+	int offsetbin;
+	float post_shift;
+	int output_scrape;
+	int scrape;
+} fastddc_t;
+
+int fastddc_init(fastddc_t* ddc, float transition_bw, int decimation, float shift_rate);
+decimating_shift_addition_status_t fastddc_apply_cc(complexf* input, complexf* output, fastddc_t* ddc, FFT_PLAN_T* plan_inverse, complexf* taps_fft, decimating_shift_addition_status_t shift_stat);
+void fastddc_print(fastddc_t* ddc);
diff --git a/libcsdr_wrapper.c b/libcsdr_wrapper.c
@@ -1,4 +1,5 @@
 #include "libcsdr.c"
 #include "libcsdr_gpl.c"
 #include "ima_adpcm.c"
+#include "fastddc.c"
 //this wrapper helps parsevect.py to generate better output