As a test I built this:
Code:
#include <avr/io.h>
volatile long IncPos_X=0;
volatile long IncPos_Y=0;
volatile long IncPos_Z=0;
__attribute__((noinline)) long Encoder_Read(int axis)
{
switch(axis)
{
case 1:
return IncPos_X;
break;
case 2:
return IncPos_Y;
break;
case 3:
return IncPos_Z;
break;
default:
break;
}
}
long POS;
int main(void)
{
POS = Encoder_Read(1);
}
and got this:
Code:
00000092 <Encoder_Read>:
volatile long IncPos_Y=0;
volatile long IncPos_Z=0;
__attribute__((noinline)) long Encoder_Read(int axis)
{
switch(axis)
92: 82 30 cpi r24, 0x02 ; 2
94: 91 05 cpc r25, r1
96: 79 f0 breq .+30 ; 0xb6 <Encoder_Read+0x24>
98: 83 30 cpi r24, 0x03 ; 3
9a: 91 05 cpc r25, r1
9c: a9 f0 breq .+42 ; 0xc8 <Encoder_Read+0x36>
9e: 81 30 cpi r24, 0x01 ; 1
a0: 91 05 cpc r25, r1
a2: d9 f4 brne .+54 ; 0xda <Encoder_Read+0x48>
{
case 1:
return IncPos_X;
a4: 40 91 68 00 lds r20, 0x0068
a8: 50 91 69 00 lds r21, 0x0069
ac: 60 91 6a 00 lds r22, 0x006A
b0: 70 91 6b 00 lds r23, 0x006B
b4: 13 c0 rjmp .+38 ; 0xdc <Encoder_Read+0x4a>
break;
case 2:
return IncPos_Y;
b6: 40 91 64 00 lds r20, 0x0064
ba: 50 91 65 00 lds r21, 0x0065
be: 60 91 66 00 lds r22, 0x0066
c2: 70 91 67 00 lds r23, 0x0067
c6: 0a c0 rjmp .+20 ; 0xdc <Encoder_Read+0x4a>
break;
case 3:
return IncPos_Z;
c8: 40 91 60 00 lds r20, 0x0060
cc: 50 91 61 00 lds r21, 0x0061
d0: 60 91 62 00 lds r22, 0x0062
d4: 70 91 63 00 lds r23, 0x0063
d8: 01 c0 rjmp .+2 ; 0xdc <Encoder_Read+0x4a>
da: 03 c0 rjmp .+6 ; 0xe2 <Encoder_Read+0x50>
break;
default:
break;
}
}
dc: 34 2f mov r19, r20
de: 25 2f mov r18, r21
e0: cb 01 movw r24, r22
e2: 63 2f mov r22, r19
e4: 72 2f mov r23, r18
e6: 08 95 ret
As you can see in the three case:'s four registers are loaded then the routine ends with a bit of jiggery-pokery to arrange for the result to be in the ABI registers R25..R22.
That sure looks like returning a "long"!
This was built in AS6 with the default compiler (4.6.2)
BTW there does seem to be a missed optimisation here - why can the compiler not simply load direct to the ABI return registers? |