;
; Two_Bytes_To_BCD_v2.asm
;
; Created: 2/1/2021 6:42:47 PM
; Author : cata
;
.cseg
.org 0x0000
jmp reset
.dseg
.org SRAM_START ; 0x0100 (m328pdef.inc)
.cseg
.org INT_VECTORS_SIZE ; 0x34 (m328pdef.inc)
reset:
;============= The Algorithm Starts Here ==========================
; INPUT: two bytes R17:R16 (0-65535)
ldi r16,low(65535)
ldi r17,high(65535) ; 2clk here
; General Ideea: divide R17:R16 by 1000 (rest 999)
; I. Divide by 4
lsr r17
ror r16
ror r3
lsr r17
ror r16
ror r3 ; div. by 4 (remainder shifteded to r3) ; 6clk
; II. Divide by 250 (max. $1fff)
; Below, We are making a transformation from base 256
; to binary coded base 250
; R17 is Quotient and R16 is Remainder
; - initially for the 256 division (already in place)
; - and finally for the 250 division (it is div by 1000, at end)
; How many times we had to ADD SIX,
; looking on the /256 Quotient&Remainder?
; Q*256+R*256=Q*(250+6)+R*(250+6)=Q*250+R*250 + 6*Q+R base 256 expressed in 10
; Q250+R250 base 250 expressed in 10
; The two above should be equal.
; And we have to redistribute 6*Q+R from B256 to B250
.DEF SIX=R18
ldi SIX,6
; Quotient is R17 and Remainder is R16 for B256
mul r17,SIX ; that is 6 times QB256 + possible RB250 in RB256
; R1 can be only 0 or 1; max. 3f*6= 017A; $ff shifted right 2 times
lsr r1 ; r1=0 and possible 1 shifted to C
brcc PC+4 ; branch if zero (evident)
inc R17 ; increment Quotient(R1 was one)
add r16,SIX ; add SIX; because we INC Quotient
brcs PC-2 ; possible one do-it-again on Carry resulted from last instruction
; On previous 6 instructions
; if High(INPUT)>$A8 6 or 10 clk, depending on RB256
; if High(INPUT)<$A8 3 or 6 clk, depending on RB256
add r16,r0 ; we add Remainder on the old one
brcc PC+4 ; but if we have CARRY?
inc r17 ; increment Quotient
add r16,SIX ; add SIX; because we INC Quotient
brcs PC-2 ; possible one repeat on Carry resulted from last instruction
; if there were 6 or 10 on previous paragraph we have only 3 clk here
; if there were 3 or 6 on previous paragraph we have 3 to max. 9 here
; so, this is somehow compensated (on both) to 6 to max. 15 clk
cpi r16,250 ; Remainder on B250 should be lower than 250
brlo PC+3 ; ( possible 1*Q RB250 in R16)
add r16,SIX
inc r17 ; max. 4
.UNDEF SIX ; no need for SIX, in this routine anylonger
; We have ended transfomation from B256 to B250
; Quotient is R17 and Remainder is R16 for B250
; There is a max of 4 + 19 + 2(MUL) + 1(ldi SIX) = 26 clk
; and a min. of 3 + 6 + 2(MUL) + 1(ldi SIX) = 12 clk
; depending on INPUT. And 20 to 34 clk from start.
; We are searching first only the hundreds, 'cause QB250 is 10 bits.
; The least significant two bits are shifted to R3,
; but do not influence the hundred digit
ldi r20,164 ; found "164" with Excel, as suggested by:
mul r20,r16 ; https://www.avrfreaks.net/users/sparrow2-0
mov r21,r1 ; hundreds digit in R21 & reversed position
andi r21,$F0 ; 'cause only high 4 bits are good; rest is !=0
swap r21
ldi r19,25 ; I have to SUB 25 times hundreds digit,
mul r19,r21 ; 'cause of initial division by 4
sub r16,r0 ; 30 to 44 clk from start
lsl r3 ; and "POP" the two bits in R3 at /4
rol r16 ; needed for tens and units; on Remainder
lsl r3
rol r16 ; 34 to 48 clk from start
; max. 99 byte conversion to BCD
; as sugested by https://www.avrfreaks.net/users/sparrow2-0
ldi r20,26 ; load value for mul with 1/10 (26/256)
mov r2,r16 ; make a copy to the remainder place
sbrc r16,6 ; if more than 64 (65, verified by me)
dec r2 ; use one less
mul r2,r20 ; do the 1/10 mul
mov r3,r1 ; high result in high byte of mul
ldi r19,10 ; load value for mul with 10
mul r1,r19
sub r16,r0 ; low digit is the remainder ; 45 to 59 here
; QB250 max. 65 'cause ffff/1000=65
; mov r18,r20 ;make a copy to the remainder place
; sbrc r20,6 ;if more than 64! (it works for 65, too)
; dec r16 ; use one less - no need, also
mul r17,r20 ; do the 1/10 mul; QB250 still in R17
mov r18,r1 ; high result in high byte of mul
; ldi r19,10 ; load value for mul with 10; already there
mul r1,r19 ; already loaded 10
sub r17,r0 ;low digit is the remainder
swap r17 ;
or r17,r21 ; these are all
swap r3 ; for packing BCD
or r16,r3 ;
; if I counted well it is between 55 to 69 clk with INPUT
; or 53 to 67 if we do not count INPUT on R17:R16 (AVG 60clk)
; if you don't use MACRO and make a CALL it is 63 to MAX 77 clk
; I will say it is: 63 clk as MACRO 70 clk as CALL
;
; Average on not counted input, from 0 to 65535 = 53.53 clk
; ( measured&calculated based on AtmelStudio Counter )
; ( and this supports my "hand counting" -min.53 without INPUT)
;
; Warning!
; You can use MACRO but it will be COPIED in Code Segment
; EVERY TIME USED; single copy on single use in ASM loops.
; ( used 280 times as MACRO it will fill 32K Code Flash )
;
; Result in R18:R17:R16 as 06:55:35 for FF:FF input in R17:R16
;============= That is All ===================================
nop
rjmp PC-1