I am new to assembly, but am working on a recursive factorial subassembly.

It must use the stack only to store values between iterations, and upon completion push the factorial onto the stack to POP after returning to CALL.

Below is my code, which works, except it returns to 'RJMP main' instead of returning to the 'CALL' after finishing.

Suggestions?

```
.def n = R16
.def result = R17
.def tmp =r20
.org 0x0000 ; next instruction will be written to address 0x0000
; (the location of the reset vector)
rjmp main ; set reset vector to point to the main code entry point
main: ; jump here on reset
; initialize the stack (RAMEND = 0x10FF by default for the ATmega128A)
ldi R16, HIGH(RAMEND)
out SPH, R16
ldi R16, low(RAMEND)
out SPL, R16
LDI n, 5; load a value into n
PUSH n ; push n on the stack
CALL factN ; calculate the factorial of n
POP result ; pop n! from the stack, save in result
here: RJMP here ; loop forever
factN:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Recursive Subroutine to Calculate Factorial of n
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
push n ; push n onto the stack
pop n ; pop n off the stack
TST R0 ; test if R0 is <= 0
BRNE notZero ; if RO is >0 jump to nonZero
ldi tmp,1 ; load 1 into tmp
mul tmp,tmp ; multiply tmp by itself to populate R0 with 1
push R0 ; push R0 onto the stack
notZero:
pop R0 ; pop R0
mul R0, n ; mulitply R0 by n
dec n ; decrement n
push n ; push n onto stack
push R0 ; push r0 onto stack
CPI n, 1 ; does n == 1
BRNE factN ;loop factN until n == 1
ret ; return to CALL
```

.def n = R16 .def result = R17 .def tmp = r20 .org 0x0000 ; next instruction will be written to address 0x0000 ; (the location of the reset vector) rjmp main ; set reset vector to point to the main code entry point main: ; jump here on reset ; initialize the stack (RAMEND = 0x10FF by default for the ATmega128A) ldi R16, HIGH(RAMEND) out SPH, R16 ldi R16, low(RAMEND) out SPL, R16 LDI n, 5 ; load a value into n PUSH n ; push n on the stack CALL factN ; calculate the factorial of n POP result ; pop n! from the stack, save in result here: RJMP here ; loop forever factN: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Recursive Subroutine to Caclulate the Factorial of n ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; push n ; push n onto the stack pop n ; pop n off the stack TST R0 ; test if R0 is <= 0 BRNE notZero; if RO is >0 jump to nonZero ldi tmp,1 ; load 1 into tmp mul tmp,tmp ; multiply tmp by itself to populate R0 with 1 push R0 ; push R0 onto the stack notZero: pop R0 ; pop R0 mul R0, n ; mulitply R0 by n dec n ; decrement n push n ; push n onto stack push R0 ; push r0 onto stack CPI n, 1 ; does n == 1 BRNE factN ; loop factN until n == 1 ret ; return to CALL

```
.def n = R16
.def result = R17
.def tmp =r20
.org 0x0000 ; next instruction will be written to address 0x0000
; (the location of the reset vector)
rjmp main ; set reset vector to point to the main code entry point
main: ; jump here on reset
; initialize the stack (RAMEND = 0x10FF by default for the ATmega128A)
ldi R16, HIGH(RAMEND)
out SPH, R16
ldi R16, low(RAMEND)
out SPL, R16
LDI n, 5; load a value into n
PUSH n ; push n on the stack
CALL factN ; calculate the factorial of n
POP result ; pop n! from the stack, save in result
here: RJMP here ; loop forever
factN:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Recursive Subroutine to Calculate Factorial of n
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
push n ; push n onto the stack
pop n ; pop n off the stack
TST R0 ; test if R0 is <= 0
BRNE notZero ; if RO is >0 jump to nonZero
ldi tmp,1 ; load 1 into tmp
mul tmp,tmp ; multiply tmp by itself to populate R0 with 1
push R0 ; push R0 onto the stack
notZero:
pop R0 ; pop R0
mul R0, n ; mulitply R0 by n
dec n ; decrement n
push n ; push n onto stack
push R0 ; push r0 onto stack
CPI n, 1 ; does n == 1
BRNE factN ;loop factN until n == 1
ret ; return to CALL
```