I posted some code here, using #1 and #2 from my sign, m644 and -Os :
https://www.avrfreaks.net/index.p...
unsigned long huge = 0x12345678, swapped; uint8_t *ptr = (uint8_t * )&huge, yea[4]; int main(void) { for ( uint8_t i = 4; i > 0; i-- ){ yea[i-1] = *ptr++ ; } swapped = *( uint32_t * )yea; while(1);
*.LSS
int main(void) { d0: ad e0 ldi r26, 0x0D ; 13 d2: b1 e0 ldi r27, 0x01 ; 1 for ( uint8_t i = 4; i > 0; i-- ){ yea[i-1] = *ptr++ ; d4: e0 91 04 01 lds r30, 0x0104 d8: f0 91 05 01 lds r31, 0x0105 dc: 81 91 ld r24, Z+ de: 8c 93 st X, r24 e0: f0 93 05 01 sts 0x0105, r31 e4: e0 93 04 01 sts 0x0104, r30 e8: 11 97 sbiw r26, 0x01 ; 1 uint8_t *ptr = (uint8_t * )&huge ; int main(void) { for ( uint8_t i = 4; i > 0; i-- ){ ea: 81 e0 ldi r24, 0x01 ; 1 ec: a9 30 cpi r26, 0x09 ; 9 ee: b8 07 cpc r27, r24 f0: 89 f7 brne .-30 ; 0xd4yea[i-1] = *ptr++ ; } swapped = *( uint32_t * )yea; f2: 11 96 adiw r26, 0x01 ; 1 f4: 8d 91 ld r24, X+ f6: 9d 91 ld r25, X+ f8: 0d 90 ld r0, X+ fa: bc 91 ld r27, X fc: a0 2d mov r26, r0 fe: 80 93 06 01 sts 0x0106, r24 102: 90 93 07 01 sts 0x0107, r25 106: a0 93 08 01 sts 0x0108, r26 10a: b0 93 09 01 sts 0x0109, r27
This version just makes yea[] local to main() and prologue_saves() blows up the code by 60 bytes ! It doesn't do any good, though and shouldn't even be called, right ?!! If I put ptr as local instead code size doesn't change. This has to be a bug.
version2.LSS
int main(void) { d0: a4 e0 ldi r26, 0x04 ; 4 d2: b0 e0 ldi r27, 0x00 ; 0 d4: ed e6 ldi r30, 0x6D ; 109 d6: f0 e0 ldi r31, 0x00 ; 0 d8: 30 c0 rjmp .+96 ; 0x13a <__prologue_saves__+0x20> da: de 01 movw r26, r28 dc: 14 96 adiw r26, 0x04 ; 4 uint8_t yea[4]; for ( uint8_t i = 4; i > 0; i-- ){ yea[i-1] = *ptr++ ; de: e0 91 04 01 lds r30, 0x0104 e2: f0 91 05 01 lds r31, 0x0105 e6: 81 91 ld r24, Z+ e8: 8c 93 st X, r24 ea: f0 93 05 01 sts 0x0105, r31 ee: e0 93 04 01 sts 0x0104, r30 f2: 11 97 sbiw r26, 0x01 ; 1 uint8_t *ptr = (uint8_t * )&huge ; int main(void) { uint8_t yea[4]; for ( uint8_t i = 4; i > 0; i-- ){ f4: ac 17 cp r26, r28 f6: bd 07 cpc r27, r29 f8: 91 f7 brne .-28 ; 0xdeyea[i-1] = *ptr++ ; } swapped = *( uint32_t * )yea; fa: 89 81 ldd r24, Y+1 ; 0x01 fc: 9a 81 ldd r25, Y+2 ; 0x02 fe: ab 81 ldd r26, Y+3 ; 0x03 100: bc 81 ldd r27, Y+4 ; 0x04 102: 80 93 06 01 sts 0x0106, r24 106: 90 93 07 01 sts 0x0107, r25 10a: a0 93 08 01 sts 0x0108, r26 10e: b0 93 09 01 sts 0x0109, r27 asm volatile( "nop":: ); 112: 00 00 nop 114: ff cf rjmp .-2 ; 0x114 00000116 : 116: f8 94 cli 118: 1c c0 rjmp .+56 ; 0x152 <_exit> 0000011a <__prologue_saves__>: 11a: 2f 92 push r2 11c: 3f 92 push r3 11e: 4f 92 push r4 120: 5f 92 push r5 122: 6f 92 push r6 124: 7f 92 push r7 126: 8f 92 push r8 128: 9f 92 push r9 12a: af 92 push r10 12c: bf 92 push r11 12e: cf 92 push r12 130: df 92 push r13 132: ef 92 push r14 134: ff 92 push r15 136: 0f 93 push r16 138: 1f 93 push r17 13a: cf 93 push r28 13c: df 93 push r29 13e: cd b7 in r28, 0x3d ; 61 140: de b7 in r29, 0x3e ; 62 142: ca 1b sub r28, r26 144: db 0b sbc r29, r27 146: 0f b6 in r0, 0x3f ; 63 148: f8 94 cli 14a: de bf out 0x3e, r29 ; 62 14c: 0f be out 0x3f, r0 ; 63 14e: cd bf out 0x3d, r28 ; 61 150: 09 94 ijmp