Not sure if this is a bug, known or otherwise but if so I can try to distill it to the simplest case. Identical makefile and source in both cases, Os.
WinAVR gcc-4.3.3 works fine with this:
static u16_t upper_layer_chksum(u8_t proto) { u16_t upper_layer_len; u16_t sum; upper_layer_len = (((u16_t)(UIP_IP_BUF->len[0]) << 8) + UIP_IP_BUF->len[1] - uip_ext_len) ; /* First sum pseudoheader. */ /* IP protocol and length fields. This addition cannot carry. */ sum = upper_layer_len + proto; /* Sum IP source and destination addresses. */ sum = chksum(sum, (u8_t *)&UIP_IP_BUF->srcipaddr, 2 * sizeof(uip_ipaddr_t)); /* Sum TCP header and data. */ sum = chksum(sum, &uip_buf[UIP_IPH_LEN + UIP_LLH_LEN + uip_ext_len], upper_layer_len); return (sum == 0) ? 0xffff : uip_htons(sum); }
but 4.5.1 from AVR Studio 5 toolchain (and 4.5.2 according to another report) gives random results unless upper_layer_len and sum are declared volatile. Here is how 4.3.3 handles it, with some comments added:
static u16_t upper_layer_chksum(u8_t proto) { 1144: 0f 93 push r16 1146: 1f 93 push r17 u16_t upper_layer_len; ;r16-17 u16_t sum; ;r24-25 upper_layer_len = (((u16_t)(UIP_IP_BUF->len[0]) << 8) + UIP_IP_BUF->len[1] - uip_ext_len) ; 1148: 10 91 a2 1e lds r17, 0x1EA2 len[0]<<8 114c: 00 e0 ldi r16, 0x00 ; 0 114e: 90 91 7e 03 lds r25, 0x037E ; r25=uip_ext_len 1152: 09 1b sub r16, r25 1154: 11 09 sbc r17, r1 ; carry 1156: 90 91 a3 1e lds r25, 0x1EA3 ;len[1] 115a: 09 0f add r16, r25 ; r16-r17=upper_layer_len 115c: 11 1d adc r17, r1 ;OK so far /* First sum pseudoheader. */ /* IP protocol and length fields. This addition cannot carry. */ sum = upper_layer_len + proto; /* Sum IP source and destination addresses. */ sum = chksum(sum, (u8_t *)&UIP_IP_BUF->srcipaddr, 2 * sizeof(uip_ipaddr_t)); 115e: 98 01 movw r18, r16 ; r18-19 = upper_layer_len 1160: 28 0f add r18, r24 ; + proto, r24 now free 1162: 31 1d adc r19, r1 ; carry 1164: c9 01 movw r24, r18 ;r24-25 = sum 1166: 66 ea ldi r22, 0xA6 ; 166 ;&srcipaddr 1168: 7e e1 ldi r23, 0x1E ; 30 116a: 40 e2 ldi r20, 0x20 ; 32 ;2*sizeof(uip_ipaddr_t) 116c: 50 e0 ldi r21, 0x00 ; 0 116e: 0e 94 71 08 call 0x10e2 ; 0x10e2/* Sum TCP header and data. */ sum = chksum(sum, &uip_buf[UIP_IPH_LEN + UIP_LLH_LEN + uip_ext_len], upper_layer_len); 1172: 60 91 7e 03 lds r22, 0x037E ; 1176: 70 e0 ldi r23, 0x00 ; 0 1178: 6a 53 subi r22, 0x3A ; 58 117a: 71 4e sbci r23, 0xE1 ; 225 117c: a8 01 movw r20, r16 117e: 0e 94 71 08 call 0x10e2 ; 0x10e2 return (sum == 0) ? 0xffff : uip_htons(sum); 1182: 00 97 sbiw r24, 0x00 ; 0 1184: 19 f4 brne .+6 ; 0x118c 1186: 2f ef ldi r18, 0xFF ; 255 1188: 3f ef ldi r19, 0xFF ; 255 118a: 02 c0 rjmp .+4 ; 0x1190 }
And here is 4.5.1. r19 seems to be used uninitialized; it seems it should be set to 0x1EA2 as for r17 above:
static u16_t upper_layer_chksum(u8_t proto) { 10c8: ef 92 push r14 10ca: ff 92 push r15 10cc: 0f 93 push r16 10ce: 1f 93 push r17 10d0: cf 93 push r28 10d2: df 93 push r29 u16_t upper_layer_len; u16_t sum; upper_layer_len = (((u16_t)(UIP_IP_BUF->len[0]) << 8) + UIP_IP_BUF->len[1] - uip_ext_len) ; 10d4: 00 91 7e 03 lds r16, 0x037E 10d8: 20 e0 ldi r18, 0x00 ; 0 ;r19 should be set to 0x1EA2? 10da: e9 01 movw r28, r18 ;r29 now undetermined 10dc: 10 e0 ldi r17, 0x00 ; 0 10de: c0 1b sub r28, r16 10e0: d1 0b sbc r29, r17 10e2: 90 91 a3 1e lds r25, 0x1EA3 10e6: c9 0f add r28, r25 10e8: d1 1d adc r29, r1 /* First sum pseudoheader. */ /* IP protocol and length fields. This addition cannot carry. */ sum = upper_layer_len + proto; /* Sum IP source and destination addresses. */ sum = chksum(sum, (u8_t *)&UIP_IP_BUF->srcipaddr, 2 * sizeof(uip_ipaddr_t)); 10ea: 7e 01 movw r14, r28 10ec: e8 0e add r14, r24 10ee: f1 1c adc r15, r1 10f0: c7 01 movw r24, r14 10f2: 66 ea ldi r22, 0xA6 ; 166 10f4: 7e e1 ldi r23, 0x1E ; 30 10f6: 40 e2 ldi r20, 0x20 ; 32 10f8: 50 e0 ldi r21, 0x00 ; 0 10fa: 0e 94 3c 08 call 0x1078 ; 0x1078/* Sum TCP header and data. */ sum = chksum(sum, &uip_buf[UIP_IPH_LEN + UIP_LLH_LEN + uip_ext_len], 10fe: b8 01 movw r22, r16 1100: 6a 53 subi r22, 0x3A ; 58 1102: 71 4e sbci r23, 0xE1 ; 225 1104: ae 01 movw r20, r28 1106: 0e 94 3c 08 call 0x1078 ; 0x1078 upper_layer_len); return (sum == 0) ? 0xffff : uip_htons(sum); 110a: 00 97 sbiw r24, 0x00 ; 0 110c: 19 f0 breq .+6 ; 0x1114 }