Login [Register]
Don't have an account? Register now to chat, post, use our tools, and much more.
Since I was able to optimize my best 16-bit multiply even further today, I thought I'd share! I even think it can be further optimized. And for that matter, here is my favorite DE_Times_A, too.

mul16 596.34375cc, 92 bytes (incl. DE_Times_A)

Code:

mul16:
;Inputs:
;   BC,DE are unsigned integers
;Output:
;   HL:DE is the 32-bit product
;Destroys:
;   A,B,C
;min: 359cc
;max: 717cc
;avg: 596.34375cc
;92 bytes
    ld a,c
    call DE_Times_A
    push hl
    push af
    ld a,b
    call DE_Times_A+2
    pop bc
    pop de
;AHL
; BDE
    ld c,d
    add hl,bc
    adc a,0
;AHLE
    ld d,l
    ld l,h
    ld h,a
;HLDE
    ret
DE_Times_A:
;Input: DE,A
;Output: A:HL is the product, C=0, B,DE unaffected, z flag set if result is zero, c flag set if A is input as 1, else nc.
;A:128~255 219+6{0,10}+{0,19}    avg=258.5   *1/2
;A:64~127  203+5{0,10}+{0,19}    avg=237.5   *1/4
;A:32~63   187+4{0,10}+{0,19}    avg=216.5   *1/8
;A:16~31   171+3{0,10}+{0,19}    avg=195.5   *1/16
;A:8~15    155+2{0,10}+{0,19}    avg=174.5   *1/32
;A:4~7     139+{0,10}+{0,19}     avg=153.5   *1/64
;A:2~3     123+{0,19}            avg=132.5   *1/128
;A:1       107cc                 avg=107     *1/256
;A:0       119cc                 avg=119     *1/256
;overall avg: 237.671875cc
    ld c,0
    ld h,d
    ld l,e
    add a,a \ jr c,mul_07
    rla \ jr c,mul_06
    rla \ jr c,mul_05
    rla \ jr c,mul_04
    rla \ jr c,mul_03
    rla \ jr c,mul_02
    rla \ jr c,mul_01
    rla
    ret c
    ld h,a
    ld l,a
    ret
mul_07:
    add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
mul_06:
    add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
mul_05:
    add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
mul_04:
    add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
mul_03:
    add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
mul_02:
    add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
mul_01:
    add hl,hl \ rla \ ret nc \ add hl,de \ adc a,c
    ret


DE_Times_A, 237.671875cc, 72 bytes

Code:

DE_Times_A:
;Input: DE,A
;Output: A:HL is the product, C=0, B,DE unaffected, z flag set if result is zero, c flag set if A is input as 1, else nc.
;A:128~255 219+6{0,10}+{0,19}    avg=258.5   *1/2
;A:64~127  203+5{0,10}+{0,19}    avg=237.5   *1/4
;A:32~63   187+4{0,10}+{0,19}    avg=216.5   *1/8
;A:16~31   171+3{0,10}+{0,19}    avg=195.5   *1/16
;A:8~15    155+2{0,10}+{0,19}    avg=174.5   *1/32
;A:4~7     139+{0,10}+{0,19}     avg=153.5   *1/64
;A:2~3     123+{0,19}            avg=132.5   *1/128
;A:1       107cc                 avg=107     *1/256
;A:0       119cc                 avg=119     *1/256
;overall avg: 237.671875cc
    ld c,0
    ld h,d
    ld l,e
    add a,a \ jr c,mul_07
    rla \ jr c,mul_06
    rla \ jr c,mul_05
    rla \ jr c,mul_04
    rla \ jr c,mul_03
    rla \ jr c,mul_02
    rla \ jr c,mul_01
    rla
    ret c
    ld h,a
    ld l,a
    ret
mul_07:
    add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
mul_06:
    add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
mul_05:
    add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
mul_04:
    add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
mul_03:
    add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
mul_02:
    add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
mul_01:
    add hl,hl \ rla \ ret nc \ add hl,de \ adc a,c
    ret


EDIT: I spotted a bug in the DE_Times_A routine! If A=1 and the c flag is set upon calling, it will erroneously return A=128. The fix was to turn the first 'rla' into 'add a,a'. Thankfully I haven't included the routine in any other work yet.
String to Byte
Takes the first 3 (non 0) chars of a string and converts to byte.

Notes:
Leading letters are treated as zeros.
Following letters are treated as null.
If string has a value > 255, a is overflowed.

Inputs:
HL: Null terminated string location in memory

Outputs:
A: Byte

Code:
strtob:
 push bc
  push de
   ld c, 0
strtobNumFind:
   ld a, (hl)
   cp $00
   jp z, strtobEXIT
   ld c, 0
   sub '1'
   cp '9' + 1
   jr c, strtobNumWhile
   inc hl
   jr strtobNumFind
strtobNumWhile:
   push hl
strtobNumWhileLoop:
    ld a, c
    cp 3
    jp nc, strtobConvert
    ld a, (hl)
    sub '0' - 1
    jr c, strtobConvert
    cp '9' + 1
    jr nc, strtobConvert
    inc hl
    inc c
    jr strtobNumWhileLoop
strtobConvert:
    dec hl
    ld a, (hl)
    sub '0'
    ld c, a
    dec hl
   pop de
    call cpHLDE
    jp c, strtobExit 
    ld a, (hl)
    sub '0'
    ld b, 10
    call mulAbyB
    add a, c
    ld c, a
    dec hl
    call cpHLDE
    jp c, strtobExit
    ld a, (hl)
    sub '0'
    ld b, 100
    call mulAbyB
    add a, c
    ld c, a
strtobExit:
   ld a, c
  pop de
 pop bc
 ret

cpHLDE:
 or a
 sbc hl, de
 add hl,de
 ret

mulAbyB:
 push bc
  ld c, a
mulAbyBLoop:
  add a, c
  djnz mulAbyBLoop
  sub c
 pop bc
 ret


Example:

Code:
ld hl, testStr
call strtob

testStr: .db "0244", 0

A now contains $F4, or '244' in decimal.
Related to the previous post, here's a heavily optimized string to byte, at 14 bytes. Same inputs, but output is in C instead of A. eZ80 only. Will give garbage results if a non-digit character is encountered or if the number is greater than 255.

Code:
strtob:
        xor     a,a
strtob_loop:
        ld      c,a
        ld      a,(hl)
        inc     hl
        sub     a,'0'
        ret     c
        ld      b,10
        mlt     bc
        add     a,c
        jr      strtob_loop


And for one byte more, here's a version that will return when encountering any non-digit character, leaving HL pointing to that character.

Code:
strtob:
        xor     a,a
strtob_loop:
        ld      c,a
        ld      a,(hl)
        sub     a,'0'
        ld      b,10
        cp      a,b
        ret     nc
        inc     hl
        mlt     bc
        add     a,c
        jr      strtob_loop
For a Z80 compatible alternative (15 bytes):

Code:

strtob:
;Input:
;   HL is a 0-terminated decimal string
;Output:
;   A has the 8-bit value
    xor a       ;A trick to reset our accumulator to zero
loop:
    inc (hl)    ;increment our byte...
    dec (hl)    ;then decrement. If originally zero, this will now set the z flag
    ret z
    ld d,a      ;save our current acumulator so we can multiply by 10
    add a,a     ;double our accumulator
    add a,a     ;double again (now x4)
    add a,d     ;add the original (now x5)
    add a,a     ;double again (now x10)
    add a,(hl)  ;add in the incoming byte
    sub '0'     ;adjust by subtracting '0' (could use 48, 0x30, $30, 30h)
    inc hl      ;increment the pointer
    jr loop


EDIT:
If you want to stop on a non-digit input (so not just a zero-terminated string):

Code:

strtob:
    xor a
    call loop
    ld a,d
    ret
loop:
    ld d,a
    ld a,(hl)
    inc hl
    sub '9'+1
    add 10
    ret nc
    ld e,a
    ld a,d
    add a,a     ;double our accumulator
    add a,a     ;double again (now x4)
    add a,d     ;add the original (now x5)
    add a,a     ;double again (now x10)
    add a,e     ;add in the incoming digit
    jr loop
Mod HL
returns the remainder after division of H and L and puts it in H

NOTE: actually doesn't divide, but simulates modulus.

Code:

Code:

;;modHL
;;H % L => H
;;Inputs:
;; H:
;; L:
modHL:
 push bc
 ld b, h
 ld a, 0
modHLLoop:
 inc a
 cp a, l
 jr nz, modHLLoopEnd
 ld a, 0
modHLLoopEnd:
 djnz modHLLoop
 ld h, a
 pop bc
 ret
Optimizing for size, I would personally suggest A mod L -> A

Code:

modloop:
    sub l
    jr nc,modloop
    add a,l

It get the job done a lot faster, destroys no variables, and it is 4 bytes. To modify it so that it uses H instead of A, simply wrap the code with a ld a,h \ ... \ ld h,a

Code:

    ld a,h
modloop:
    sub l
    jr nc,modloop
    add a,l
    ld h,a

EDIT: As Runer pointed out, this will cause an infinite loop if L=0. If you can't guarantee this won't happen, then use this H mod L -> H routine:

Code:

;H mod L -> H
;returns carry reset if undefined (when L=0)
    ld a,l
    or a
    ret z
    ld a,h
modloop:
    sub l
    jr nc,modloop
    add a,l
    ld h,a
    ret
I have two neat routines, pushpop and diRestore. If you want to preserve HL,DE,BC, and AF, then you can just put a call to pushpop at the start of your routine. If you want interrupts to be restored on exiting your routine, you can call diRestore at the top of your code.

They both mess with the stack to insert a return routine on the stack. For example, when your code calls diRestore, then an ret will first return to the code to restore the interrupt status, and then back to the calling routine.


Code:
pushpop:
;26 bytes, adds 229cc to the calling routine
  ex (sp),hl
  push de
  push bc
  push af
  push hl
  ld hl,pushpopret
  ex (sp),hl
  push hl
  push af
  ld hl,12
  add hl,sp
  ld a,(hl)
  inc hl
  ld h,(hl)
  ld l,a
  pop af
  ret
pushpopret:
  pop af
  pop bc
  pop de
  pop hl
  ret


Code:

diRestore:
    ex (sp),hl
    push hl
    push af
    ld hl,restoreei
    ld a,r
    jp pe,+_
    dec hl
    dec hl
_:
    di
    inc sp
    inc sp
    inc sp
    inc sp
    ex (sp),hl
    dec sp
    dec sp
    dec sp
    dec sp
    pop af
    ret
restoredi:
    di
    ret
restoreei:
    ei
    ret

EDIT: calc84maniac pointed out that 'inc sp' can be dangerous without first disabling interrupts. I've reorganized the diRestore accordingly.
32-Bit Endian Swap (eZ80)

Swaps the byte order of a 32-bit value stored in EUHL. Can be adapted to work with other register combinations as well.


Code:
endianswap32:
    push hl
    ld h,e
    ld e,l
    inc sp
    push hl
    inc sp
    pop hl
    inc sp
    ret
  
Register to Join the Conversation
Have your own thoughts to add to this or any other topic? Want to ask a question, offer a suggestion, share your own programs and projects, upload a file to the file archives, get help with calculator and computer programming, or simply chat with like-minded coders and tech and calculator enthusiasts via the site-wide AJAX SAX widget? Registration for a free Cemetech account only takes a minute.

» Go to Registration page
Page 8 of 8
» All times are GMT - 5 Hours
 
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum

 

Advertisement