Here's a bit of extreme silliness I wrote over the weekend...
So first I wrote this AVR program:
#include__attribute__((noinline)) void mydelay(void) { volatile uint16_t i; for (i = 0; i < 23; i++); } int main(void) { DDRB = 0xFF; PORTB = 0xAA; while(1) { PORTB ^= 0xFF; mydelay(); } }
A standard "LED flasher". The reason why the delay only counts to 23 will become apparent later ;-) (I started with a delay count of 10,000 then changed my mind). I built the code (-Os for mega16) and got:
:100000000C942A000C9434000C9434000C943400AA :100010000C9434000C9434000C9434000C94340090 :100020000C9434000C9434000C9434000C94340080 :100030000C9434000C9434000C9434000C94340070 :100040000C9434000C9434000C9434000C94340060 :100050000C94340011241FBECFE5D4E0DEBFCDBF29 :100060000E944C000C9456000C940000DF93CF9338 :1000700000D0CDB7DEB71A82198205C089819A8176 :1000800001969A83898389819A814797B8F30F9063 :100090000F90CF91DF9108958FEF87BB8AEA88BBDD :1000A00088B3809588BB0E943600FACFF894FFCFC2 :00000001FF
also this:
Disassembly of section .text: 00000000 <__vectors>: 0: 0c 94 2a 00 jmp 0x54 ; 0x54 <__ctors_end> 4: 0c 94 34 00 jmp 0x68 ; 0x68 <__bad_interrupt> [snippety snip snip] 00000054 <__ctors_end>: 54: 11 24 eor r1, r1 56: 1f be out 0x3f, r1 ; 63 58: cf e5 ldi r28, 0x5F ; 95 5a: d4 e0 ldi r29, 0x04 ; 4 5c: de bf out 0x3e, r29 ; 62 5e: cd bf out 0x3d, r28 ; 61 60: 0e 94 4c 00 call 0x98 ; 0x9864: 0c 94 56 00 jmp 0xac ; 0xac <_exit> 00000068 <__bad_interrupt>: 68: 0c 94 00 00 jmp 0 ; 0x0 <__vectors> 0000006c : #include __attribute__((noinline)) void mydelay(void) { 6c: df 93 push r29 6e: cf 93 push r28 70: 00 d0 rcall .+0 ; 0x72 72: cd b7 in r28, 0x3d ; 61 74: de b7 in r29, 0x3e ; 62 volatile uint16_t i; for (i = 0; i < 23; i++); 76: 1a 82 std Y+2, r1 ; 0x02 78: 19 82 std Y+1, r1 ; 0x01 7a: 05 c0 rjmp .+10 ; 0x86 7c: 89 81 ldd r24, Y+1 ; 0x01 7e: 9a 81 ldd r25, Y+2 ; 0x02 80: 01 96 adiw r24, 0x01 ; 1 82: 9a 83 std Y+2, r25 ; 0x02 84: 89 83 std Y+1, r24 ; 0x01 86: 89 81 ldd r24, Y+1 ; 0x01 88: 9a 81 ldd r25, Y+2 ; 0x02 8a: 47 97 sbiw r24, 0x17 ; 23 8c: b8 f3 brcs .-18 ; 0x7c } 8e: 0f 90 pop r0 90: 0f 90 pop r0 92: cf 91 pop r28 94: df 91 pop r29 96: 08 95 ret 00000098 : int main(void) { DDRB = 0xFF; 98: 8f ef ldi r24, 0xFF ; 255 9a: 87 bb out 0x17, r24 ; 23 PORTB = 0xAA; 9c: 8a ea ldi r24, 0xAA ; 170 9e: 88 bb out 0x18, r24 ; 24 while(1) { PORTB ^= 0xFF; a0: 88 b3 in r24, 0x18 ; 24 a2: 80 95 com r24 a4: 88 bb out 0x18, r24 ; 24 mydelay(); a6: 0e 94 36 00 call 0x6c ; 0x6c aa: fa cf rjmp .-12 ; 0xa0
Then I converted the .hex back to .bin with:
E:\avr\default>avr-objcopy -I ihex -O binary test.hex test.bin E:\avr\default>
Then I wrote this PC program to convert .bin files back into .c source. I would just have used "xxd -i" except that I wanted an array of uint16_t:
#include#include #include typedef int bool; #define false ((bool)0) #define true (!false) FILE * fin; FILE * fout; char * p; unsigned int n; bool first = true; int main(int argc, char * argv[]) { char buffer[256]; int count; if (argc != 2) { printf("usage: makeavrc file.bin\n"); exit(0); } strcpy(buffer, argv[1]); if (strchr(buffer, '.')) { p = strchr(buffer, '.'); *p = 0; } strcat(buffer, ".c"); fin = fopen(argv[1], "rb"); fout = fopen(buffer, "wt"); strcpy(buffer, "unsigned int "); strcat(buffer, argv[1]); p = strchr(buffer, '.'); if (p) { *p = 0; } strcat(buffer, "[] = {\n"); count = 0; while(!feof(fin)) { fread(&n, 2, 1, fin); if (!feof(fin)) { fprintf(fout, "%s 0x%04X", first ? buffer: (count == 0) ? ",\n" : ",", n); first = false; count++; if (count == 8) { count = 0; } } } fprintf(fout, "\n};\n"); fclose(fin); fclose(fout); }
which I used as:
E:\avr\default>makeavrc test.bin E:\avr\default>type test.c unsigned int test[] = { 0x940C, 0x002A, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x2411, 0xBE1F, 0xE5CF, 0xE0D4, 0xBFDE, 0xBFCD, 0x940E, 0x004C, 0x940C, 0x0056, 0x940C, 0x0000, 0x93DF, 0x93CF, 0xD000, 0xB7CD, 0xB7DE, 0x821A, 0x8219, 0xC005, 0x8189, 0x819A, 0x9601, 0x839A, 0x8389, 0x8189, 0x819A, 0x9747, 0xF3B8, 0x900F, 0x900F, 0x91CF, 0x91DF, 0x9508, 0xEF8F, 0xBB87, 0xEA8A, 0xBB88, 0xB388, 0x9580, 0xBB88, 0x940E, 0x0036, 0xCFFA, 0x94F8, 0xCFFF };
Then I incorporated that in the main target AVR program (still work in progress):
#includeunsigned int test[] = { 0x940C, 0x002A, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x940C, 0x0034, 0x2411, 0xBE1F, 0xE5CF, 0xE0D4, 0xBFDE, 0xBFCD, 0x940E, 0x004C, 0x940C, 0x0056, 0x940C, 0x0000, 0x93DF, 0x93CF, 0xD000, 0xB7CD, 0xB7DE, 0x821A, 0x8219, 0xC005, 0x8189, 0x819A, 0x9601, 0x839A, 0x8389, 0x8189, 0x819A, 0x9747, 0xF3B8, 0x900F, 0x900F, 0x91CF, 0x91DF, 0x9508, 0xEF8F, 0xBB87, 0xEA8A, 0xBB88, 0xB388, 0x9580, 0xBB88, 0x940E, 0x0036, 0xCFFA, 0x94F8, 0xCFFF }; volatile uint32_t regPC; volatile uint16_t opcode; volatile uint32_t jdest; volatile uint16_t src, dst; volatile int16_t offset; // The AVR memory... volatile union { struct { uint8_t R[32]; uint8_t SFR[61]; union { struct { uint8_t rSPL; uint8_t rSPH; }; uint16_t rSP; }; union { struct { uint8_t C:1; uint8_t Z:1; uint8_t N:1; uint8_t V:1; uint8_t S:1; uint8_t H:1; uint8_t T:1; uint8_t I:1; }; uint8_t all; } rSREG; uint8_t ram2[1024]; }; uint8_t RAM[32 + 64 + 1024]; } avr; int main(void) { regPC = 0; for (offset = 0; offset < 61; offset++) { avr.SFR[offset] = 0xDD; } for (offset = 0; offset < 1024; offset++) { avr.ram2[offset] = 0xEE; } while (1) { opcode = test[regPC]; if (opcode == 0x0000) { // NOP // do nowt } else if ((opcode & 0xFE0E) == 0x940C) { // JMP // decode jdest = ((opcode & 0x01F0) >> 3) | (opcode & 1); jdest <<= 16UL; jdest |= test[regPC+1]; // action regPC = (jdest - 1); // -1 because it's incremented below } else if ((opcode & 0xFC00) == 0x2400) { // EOR // decode src = (opcode & 0x000F) | ((opcode & 0x0200) >> 5); dst = (opcode & 0x01F0) >> 4; // action avr.R[dst] = avr.R[src] ^ avr.R[dst]; avr.rSREG.all = SREG; } else if ((opcode & 0xF800) == 0xB800) { // OUT // decode dst = (opcode & 0x0600) >> 5; dst |= (opcode & 0x000F); src = (opcode & 0x01F0) >> 4; // add the IO to RAM offset dst += 0x20; // action avr.RAM[dst] = avr.R[src]; } else if ((opcode & 0xF800) == 0xB000) { // IN // decode src = (opcode & 0x0600) >> 5; src |= (opcode & 0x000F); dst = (opcode & 0x01F0) >> 4; // add the IO to RAM offset src += 0x20; // action avr.R[dst] = avr.RAM[src]; } else if ((opcode & 0xF000) == 0xE000) { // LDI // decode dst = (opcode & 0x00F0) >> 4; dst += 16; // only upper 16 registers src = (opcode & 0x0F00) >> 4; src |= (opcode & 0x000F); // action avr.R[dst] = src; } else if ((opcode & 0xFE0E) == 0x940E) { // CALL // decode jdest = ((opcode & 0x01F0) >> 3) | (opcode & 1); jdest <<= 16UL; jdest |= test[regPC+1]; // action avr.rSP -= 2; avr.RAM[avr.rSP + 1] = (regPC + 2) & 0xFF; avr.RAM[avr.rSP + 2] = (regPC + 2) >> 8; regPC = jdest - 1; } else if (opcode == 0x9508) { // RET // action regPC = avr.RAM[avr.rSP + 1]; regPC |= (avr.RAM[avr.rSP + 2] >>8); regPC--; // simply because there's a +1 below avr.rSP += 2; } else if ((opcode & 0xFE0F) == 0x920F) { // PUSH // decode src = (opcode & 0x01F0) >> 4; // action avr.rSP -= 1; avr.RAM[avr.rSP + 1] = avr.R[src]; } else if ((opcode & 0xFE0F) == 0x900F) { // POP // decode dst = (opcode & 0x01F0) >> 4; // action avr.rSP += 1; avr.R[dst] = avr.RAM[avr.rSP]; } else if ((opcode & 0xF000) == 0xD000) { // RCALL // decode offset = opcode & 0x0FFF; if (offset & 0x0800) { offset = 0x1000 - offset; offset *= -1; } // action avr.rSP -= 2; avr.RAM[avr.rSP + 1] = (regPC + 2) & 0xFF; avr.RAM[avr.rSP + 2] = (regPC + 2) >> 8; regPC = regPC + offset; // no +1 because regPC++ below } else if ((opcode & 0xF000) == 0xC000) { // RJMP // decode offset = opcode & 0x0FFF; if (offset & 0x0800) { offset = 0x1000 - offset; offset *= -1; } // action regPC = regPC + offset; // no +1 because regPC++ below } else if ((opcode & 0xFE0F) == 0x9400) { // COM // decode src = (opcode & 0x01F0) >> 4; // action avr.R[src] = 255 - avr.R[src]; avr.rSREG.all = SREG; } else if ((opcode & 0xD208) == 0x8208) { // STD Y+q, Rn // decode src = (opcode & 0x01F0) >> 4; dst = opcode & 7; dst |= (opcode & 0x0C00) >> 6; dst |= (opcode & 0x2000) >> 8; // action avr.RAM[(avr.R[29]<<8) + avr.R[28] + dst] = avr.R[src]; } else if ((opcode & 0xD208) == 0x8008) { // LDD Rn, Y+q // decode dst = (opcode & 0x01F0) >> 4; src = opcode & 7; src |= (opcode & 0x0C00) >> 6; src |= (opcode & 0x2000) >> 8; // action avr.R[dst] = avr.RAM[(avr.R[29]<<8) + avr.R[28] + src]; } else if ((opcode & 0xF000) == 0x5000) { // SUBI // decode dst = (opcode & 0x00F0) >> 4; dst += 16; // offset to R16..R32 src = (opcode & 0x0F00) >> 8; src |= (opcode & 0x000F); // action avr.R[dst] = avr.R[dst] - src; avr.rSREG.all = SREG; } else if ((opcode & 0xF000) == 0x4000) { // SBCI // decode dst = (opcode & 0x00F0) >> 4; dst += 16; // offset to R16..R32 src = (opcode & 0x0F00) >> 8; src |= (opcode & 0x000F); // action avr.R[dst] = avr.R[dst] - src - avr.rSREG.C; avr.rSREG.all = SREG; } else if ((opcode & 0xFC07) == 0xF400) { // BRCC // decode offset = (opcode & 0x03F8) >> 3; if (offset & 0x40) { // 2's complement if -ve offset = 0x80 - offset; offset *= -1; } // action if (!avr.rSREG.C) { regPC = regPC + offset; //+1 done by regPC++ below. } } else if ((opcode & 0xFC07) == 0xF000) { // BRCS // decode offset = (opcode & 0x03F8) >> 3; if (offset & 0x40) { // 2's complement if -ve offset = 0x80 - offset; offset *= -1; } // action if (avr.rSREG.C) { regPC = regPC + offset; //+1 done by regPC++ below. } } else if ((opcode & 0xFF00) == 0x9600) { // ADIW int16_t * p; // decode dst = (opcode & 0x0030) >> 4; offset = (opcode & 0x00C0) >> 2; offset |= (opcode & 0x000F); // action p = (int16_t *)&avr.R[24]; p += dst; *p += offset; avr.rSREG.all = SREG; } else if ((opcode & 0xFF00) == 0x9700) { // SBIW int16_t * p; // decode dst = (opcode & 0x0030) >> 4; offset = (opcode & 0x00C0) >> 2; offset |= (opcode & 0x000F); // action p = (int16_t *)&avr.R[24]; p += dst; *p -= offset; avr.rSREG.all = SREG; } regPC++; } }
That's built for a mega32 which is very like the original target mega16 but has 2K of SRAM so there's enough room for a complete copy of the mega16 memory map within its own SRAM. So far I have implemented just enough opcodes to actually run the original AVR program I wrote but the key thing is that it's executing from RAM not code flash!
Yes, indeed, it's an AVR emulator and it's about the slowest bit of AVR code you could ever hope to meet. The original code hit a breakpoint on the COM R24 instruction every 457 cycles. The simulated code executes the COM instruction every 74,198 cycles - so it's 162 times slower than a real AVR on this short test !!
Admittedly that's built -O0 (for reasons I haven't yet determined the STD Y+q,Rd was not behaving right when optimised so I had no choice).
I was prompted to have a crack at this by yet another thread the other day looking for a way to "run programs from SD card". If one emulates an AVR using an AVR you could - as I did here - use the same build tools to build the program to be emulated and given that the opcode fetch is currently just:
while (1) { opcode = test[regPC];
that could just as easily be:
while (1) { opcode = read_2bytes_fromSD_at_offset_(regPC);
One thing about doing this is that you actually get a much deeper understanding about the way the AVR core must actually be doing it's job (like how to calculate negative offsets in RJMPs etc).
I think I might continue but I can see that the only practical way to do this would probably to be using inline Asm (which I was trying to avoid) though I suppose if you used a 20MHz 1284P it'd currently have plenty of room to emulated 164/324/644 at about 125kHz - so you probably wouldn't want to be setting your LED delays to much more than 23!
BTW at present the emulated program does not get to touch the AVR hardware at all but clearly all of the SFRs apart from SPL/SPH and SREG could "write through" to the real SFR addresses on the host CPU and get those LEDs flashing (slowly!).
==========================================================================================