I have no specific questions or requests for comment here. I thought some peeps here may be interested.
In another thread I started a project which involves designing feedback control for a switched power converter. My design involves computing the feedback by applying a number of "axpy" (a*x + y) operations. I wanted to see if I could customize my own fixed point computation that might be faster and more compact than either built-in floating point or fixed point.
For the program I set up, I ran code for all three on my simulator and checked the time to evaluate "mycontrol()". Here are the results. Next to the size (in bytes of just the support code) and time (in seconds) are ratios wrt floating point. N
float pt: size= 734 1.00 time=0.00191284 1.00
fixed pt: size=1186 1.62 time=0.00169482 0.89
my fixed pt: size= 146 0.20 time=0.00147237 0.77
Note that the gcc/avr-libc fixed point code is a bit larger than the floating point code. Also, the performance of the floating point code is not grossly worse than the fixed point. However, the times include all the "glue" code between binary operations (e.g., function calls, looping, etc). I should probably try to isolate the binary operation times.
Here is the main program and my "axpy" code (which I call ypax, because it's more efficient to keep y first). In avr-gcc, long fract is 0.31, long accum is 32.31; in my implementation I use 0.23 for fract and 8.23 for accum, where the notation N.R (e.g., 32.31) indicates N bits for the integer part and R bits for the fractional part.
main.c and fixpt.S follow. I have done testing on the fixpt code but not 100% confident it is fully working.
#include <stdio.h> #include <stdint.h> #include <stdfix.h> #define USE_FIXED 1 int32_t fx32ypax(int32_t y, long fract *a, long fract *x); float fl32ypax(float y, float *a, float *x) { return y + (*a)*(*x); } union u_fract { long fract v; struct { uint8_t d, c, b; int8_t a; }; }; union u_accum { int32_t v; struct { uint8_t d, c, b; int8_t a; }; }; static inline int32_t fx32accum(long fract f) { union u_fract x; union u_accum y; x.v = f; y.a = 0; y.b = x.a; y.c = x.b; y.d = x.c; return y.v; } static inline long fract accum2fx32(int32_t a) { union u_accum x; union u_fract y; x.v = a; y.a = x.b; y.b = x.c; y.c = x.d; y.d = 0; return y.v; } static inline int8_t fx32toi8(long fract f) { union u_fract y; y.v = f; return y.d; } #if USE_FIXED #if 1 /* my stuff */ #define myfract long fract #define myaccum uint32_t #define myfrtoac fx32accum #define myactofr accum2fx32 #define myfrtoi8 fx32toi8 #define ypax fx32ypax #else /* avr-libc */ #define myfract long fract #define myaccum long accum #define myfrtoac(X) (X) #define myactofr(X) (X) #define myfrtoi8(X) ((int8_t)((X)*256)) #define ypax(Y,AP,XP) ((Y)+(*(AP))*(*(XP))) #endif #else #define myfract float #define myaccum float #define myfrtoac(X) (X) #define myactofr(X) (X) #define myfrtoi8(X) ((int8_t)((X)*256.0)) #define ypax fl32ypax #endif typedef struct { uint8_t n, nsb, nsp;; myfract *a, *b, *c, d; } ss_t; myfract __attribute__((noinline)) ss_eval(ss_t *m, myfract *x1, myfract *x0, myfract u) { myaccum t, y; uint8_t ia, ix, nx; uint8_t i, j, n, p, q; n = m->n; p = m->nsb; q = m->nsp; y = 0; ia = 0; ix = 0; nx = 1 + q; for (i = 0; i < n; i++) { if (i >= (n-q)) nx--; t = myfrtoac(x0[i]); t = ypax(t, &(m->b[i]), &u); /* + B*u */ for (j = 0; j < nx; j++) { /* + A*x */ t = ypax(t, &(m->a[ia++]), &x0[ix+j]); } x1[i] = myactofr(t); if (i < p) nx++; if (i >= p) ix++; y = ypax(y, &(m->c[i]), &x1[i]); } return y; } const uint8_t n1sb = 1; const uint8_t n1sp = 3; #if USE_FIXED long fract a1[] = { -0.0001000lr, +0.0017301lr, -0.0056214lr, +0.0000000lr, +0.0000000lr, -0.0035122lr, +0.0157545lr, -0.0226529lr, +0.0295285lr, -0.0023164lr, -0.0035122lr, +0.0044595lr, -0.0181410lr, +0.0000000lr, -0.0152183lr, +0.0431653lr, -0.0151720lr, -0.0152183lr, }; long fract b1[] = { -0.0034256lr, +0.0024538lr, +0.0037149lr, -0.0001916lr, +0.0003384lr, }; long fract c1[] = { -0.0133252lr, +0.0434246lr, -0.0332937lr, +0.0754853lr, -0.0336192lr, }; #else float a1[] = { -0.0001000, +0.0017301, -0.0056214, +0.0000000, +0.0000000, -0.0035122, +0.0157545, -0.0226529, +0.0295285, -0.0023164, -0.0035122, +0.0044595, -0.0181410, +0.0000000, -0.0152183, +0.0431653, -0.0151720, -0.0152183, }; float b1[] = { -0.0034256, +0.0024538, +0.0037149, -0.0001916, +0.0003384, }; float c1[] = { -0.0133252, +0.0434246, -0.0332937, +0.0754853, -0.0336192, }; #endif myfract get_meas() { static uint8_t ix = 0; #if USE_FIXED static long fract vals[] = { +0.002lr, +0.003lr, +0.004lr, +0.005lr, +0.006lr, +0.007lr, +0.008lr, +0.008lr, +0.007lr, +0.006lr, +0.005lr, +0.004lr, +0.003lr, +0.002lr, -0.002lr, -0.003lr, -0.004lr, -0.005lr, -0.006lr, -0.007lr, -0.008lr, -0.008lr, -0.007lr, -0.006lr, -0.005lr, -0.004lr, -0.003lr, -0.002lr, +0.020lr, +0.030lr, +0.040lr, +0.050lr, +0.060lr, +0.070lr, +0.080lr, +0.080lr, +0.070lr, +0.060lr, +0.050lr, +0.040lr, +0.030lr, +0.020lr, -0.020lr, -0.030lr, -0.040lr, -0.050lr, -0.060lr, -0.070lr, -0.080lr, -0.080lr, -0.070lr, -0.060lr, -0.050lr, -0.040lr, -0.030lr, -0.020lr, +0.200lr, +0.300lr, +0.400lr, +0.500lr, +0.600lr, +0.700lr, +0.800lr, +0.800lr, +0.700lr, +0.600lr, +0.500lr, +0.400lr, +0.300lr, +0.200lr, -0.200lr, -0.300lr, -0.400lr, -0.500lr, -0.600lr, -0.700lr, -0.800lr, -0.800lr, -0.700lr, -0.600lr, -0.500lr, -0.400lr, -0.300lr, -0.200lr, }; #else static float vals[] = { +0.002, +0.003, +0.004, +0.005, +0.006, +0.007, +0.008, +0.008, +0.007, +0.006, +0.005, +0.004, +0.003, +0.002, -0.002, -0.003, -0.004, -0.005, -0.006, -0.007, -0.008, -0.008, -0.007, -0.006, -0.005, -0.004, -0.003, -0.002, +0.020, +0.030, +0.040, +0.050, +0.060, +0.070, +0.080, +0.080, +0.070, +0.060, +0.050, +0.040, +0.030, +0.020, -0.020, -0.030, -0.040, -0.050, -0.060, -0.070, -0.080, -0.080, -0.070, -0.060, -0.050, -0.040, -0.030, -0.020, +0.200, +0.300, +0.400, +0.500, +0.600, +0.700, +0.800, +0.800, +0.700, +0.600, +0.500, +0.400, +0.300, +0.200, -0.200, -0.300, -0.400, -0.500, -0.600, -0.700, -0.800, -0.800, -0.700, -0.600, -0.500, -0.400, -0.300, -0.200, }; #endif if ((++ix) >= (sizeof(vals)/sizeof(vals[0]))) ix = 0; return vals[ix]; } volatile uint8_t d_num; void put_cmmd(myfract u) { int8_t nn; nn = myfrtoi8(u) + 32; if (nn > 62) nn = 62; if (nn < 1) nn = 1; } void mycontrol() { myfract x0[5], x1[5]; /* state buffers */ myfract *xp, *xn, *xt; /* prev, next, temp */ myfract u, y; ss_t m; m.n = 5; m.nsb = n1sb; m.nsp = n1sp; m.a = a1; m.b = b1; m.c = c1; m.d = 0; xp = x0; xn = x1; for (uint8_t i = 0; i < 128; i++) { u = get_meas(); y = ss_eval(&m, xn, xp, u); put_cmmd(y); xt = xn; xn = xp; xp = xt; /* swap prev,next */ } } int main() { mycontrol(); while (1); }
.section .text .global fx32ypax ;; int32_t muls334x(int32_t acc, long fract *x, long fract *y) .type fx32ypax,function fx32ypax: push r28 push r29 movw Y,r18 movw Z,r20 adiw r28,1 adiw r30,1 ;; (g:h:i:j) acc: (r25:r24:r23:r22) ;; (a:b:c) in: (r21:r20) ;; (d:e:f) in: (r19:r18) ;; _[a*d] ;; [a*e] ;; [d*b] ;; [b*e] ;; [a*f] ;; [d*c] ;; [b*f] ;; [e*f] ;; [c*f] ;; (g:h:i:j) in (r25:r24:r23:r22) ;; 0,1,2,18,19,20,21,22,23,24,25,28,29,30,31 clr r26 ; need a zero ;; g[a*d] ldd r18,Y+2 ; a ldd r19,Z+2 ; d fmuls r18,r19 ; (a*d)<<1 => r1:r0 adc r25,r26 ; accum (only -1*-1) add r23,r0 adc r24,r1 sbrc r1,7 ; sign extend com r26 ; r26:r1:r0 adc r25,r26 clr r26 ;; g:h[a*e] clr r20 ldd r18,Y+2 ; a ldd r19,Z+1 ; e fmulsu r18,r19 sbc r20,r26 add r22,r0 adc r23,r1 adc r20,r26 ;; g:h[d*b] ldd r18,Z+2 ; d ldd r19,Y+1 ; b fmulsu r18,r19 sbc r20,r26 add r22,r0 adc r23,r1 adc r24,r20 sbrc r1,7 ; sign extend com r26 ; r2:r1:r0 adc r25,r26 clr r26 ;; g:h:i[b*e] clr r20 ldd r18,Y+1 ; b ldd r19,Z+1 ; e fmul r18,r19 ; r1:r0 <= (b*e)<<1 adc r20,r26 ; prop carry add r22,r1 adc r20,r26 ;; g:h:i[a*f] ldd r18,Y+2 ; a ldd r19,Z+0 ; f fmulsu r18,r19 ; a*f sbc r20,r26 add r22,r1 adc r20,r26 ;; g:h:i[d*c] ldd r18,Z+2 ; d ldd r19,Y+0 ; c fmulsu r18,r19 sbc r20,r26 add r22,r1 adc r23,r20 adc r24,r26 sbrc r1,7 ; sign extend com r26 ; r26:r1:r0 adc r25,r26 clr r26 clr r1 pop r29 pop r28 ret