Login [Register]
Don't have an account? Register now to chat, post, use our tools, and much more.
String to Byte
Takes the first 3 (non 0) chars of a string and converts to byte.

Notes:
Leading letters are treated as zeros.
Following letters are treated as null.
If string has a value > 255, a is overflowed.

Inputs:
HL: Null terminated string location in memory

Outputs:
A: Byte

Code:
strtob:
 push bc
  push de
   ld c, 0
strtobNumFind:
   ld a, (hl)
   cp $00
   jp z, strtobEXIT
   ld c, 0
   sub '1'
   cp '9' + 1
   jr c, strtobNumWhile
   inc hl
   jr strtobNumFind
strtobNumWhile:
   push hl
strtobNumWhileLoop:
    ld a, c
    cp 3
    jp nc, strtobConvert
    ld a, (hl)
    sub '0' - 1
    jr c, strtobConvert
    cp '9' + 1
    jr nc, strtobConvert
    inc hl
    inc c
    jr strtobNumWhileLoop
strtobConvert:
    dec hl
    ld a, (hl)
    sub '0'
    ld c, a
    dec hl
   pop de
    call cpHLDE
    jp c, strtobExit 
    ld a, (hl)
    sub '0'
    ld b, 10
    call mulAbyB
    add a, c
    ld c, a
    dec hl
    call cpHLDE
    jp c, strtobExit
    ld a, (hl)
    sub '0'
    ld b, 100
    call mulAbyB
    add a, c
    ld c, a
strtobExit:
   ld a, c
  pop de
 pop bc
 ret

cpHLDE:
 or a
 sbc hl, de
 add hl,de
 ret

mulAbyB:
 push bc
  ld c, a
mulAbyBLoop:
  add a, c
  djnz mulAbyBLoop
  sub c
 pop bc
 ret


Example:

Code:
ld hl, testStr
call strtob

testStr: .db "0244", 0

A now contains $F4, or '244' in decimal.
Related to the previous post, here's a heavily optimized string to byte, at 14 bytes. Same inputs, but output is in C instead of A. eZ80 only. Will give garbage results if a non-digit character is encountered or if the number is greater than 255.

Code:
strtob:
        xor     a,a
strtob_loop:
        ld      c,a
        ld      a,(hl)
        inc     hl
        sub     a,'0'
        ret     c
        ld      b,10
        mlt     bc
        add     a,c
        jr      strtob_loop


And for one byte more, here's a version that will return when encountering any non-digit character, leaving HL pointing to that character.

Code:
strtob:
        xor     a,a
strtob_loop:
        ld      c,a
        ld      a,(hl)
        sub     a,'0'
        ld      b,10
        cp      a,b
        ret     nc
        inc     hl
        mlt     bc
        add     a,c
        jr      strtob_loop
For a Z80 compatible alternative (15 bytes):

Code:

strtob:
;Input:
;   HL is a 0-terminated decimal string
;Output:
;   A has the 8-bit value
    xor a       ;A trick to reset our accumulator to zero
loop:
    inc (hl)    ;increment our byte...
    dec (hl)    ;then decrement. If originally zero, this will now set the z flag
    ret z
    ld d,a      ;save our current acumulator so we can multiply by 10
    add a,a     ;double our accumulator
    add a,a     ;double again (now x4)
    add a,d     ;add the original (now x5)
    add a,a     ;double again (now x10)
    add a,(hl)  ;add in the incoming byte
    sub '0'     ;adjust by subtracting '0' (could use 48, 0x30, $30, 30h)
    inc hl      ;increment the pointer
    jr loop


EDIT:
If you want to stop on a non-digit input (so not just a zero-terminated string):

Code:

strtob:
    xor a
    call loop
    ld a,d
    ret
loop:
    ld d,a
    ld a,(hl)
    inc hl
    sub '9'+1
    add 10
    ret nc
    ld e,a
    ld a,d
    add a,a     ;double our accumulator
    add a,a     ;double again (now x4)
    add a,d     ;add the original (now x5)
    add a,a     ;double again (now x10)
    add a,e     ;add in the incoming digit
    jr loop
Mod HL
returns the remainder after division of H and L and puts it in H

NOTE: actually doesn't divide, but simulates modulus.

Code:

Code:

;;modHL
;;H % L => H
;;Inputs:
;; H:
;; L:
modHL:
 push bc
 ld b, h
 ld a, 0
modHLLoop:
 inc a
 cp a, l
 jr nz, modHLLoopEnd
 ld a, 0
modHLLoopEnd:
 djnz modHLLoop
 ld h, a
 pop bc
 ret
Optimizing for size, I would personally suggest A mod L -> A

Code:

modloop:
    sub l
    jr nc,modloop
    add a,l

It get the job done a lot faster, destroys no variables, and it is 4 bytes. To modify it so that it uses H instead of A, simply wrap the code with a ld a,h \ ... \ ld h,a

Code:

    ld a,h
modloop:
    sub l
    jr nc,modloop
    add a,l
    ld h,a

EDIT: As Runer pointed out, this will cause an infinite loop if L=0. If you can't guarantee this won't happen, then use this H mod L -> H routine:

Code:

;H mod L -> H
;returns carry reset if undefined (when L=0)
    ld a,l
    or a
    ret z
    ld a,h
modloop:
    sub l
    jr nc,modloop
    add a,l
    ld h,a
    ret
I have two neat routines, pushpop and diRestore. If you want to preserve HL,DE,BC, and AF, then you can just put a call to pushpop at the start of your routine. If you want interrupts to be restored on exiting your routine, you can call diRestore at the top of your code.

They both mess with the stack to insert a return routine on the stack. For example, when your code calls diRestore, then an ret will first return to the code to restore the interrupt status, and then back to the calling routine.


Code:
pushpop:
;26 bytes, adds 229cc to the calling routine
  ex (sp),hl
  push de
  push bc
  push af
  push hl
  ld hl,pushpopret
  ex (sp),hl
  push hl
  push af
  ld hl,12
  add hl,sp
  ld a,(hl)
  inc hl
  ld h,(hl)
  ld l,a
  pop af
  ret
pushpopret:
  pop af
  pop bc
  pop de
  pop hl
  ret


Code:

diRestore:
    ex (sp),hl
    push hl
    push af
    ld hl,restoreei
    ld a,r
    jp pe,+_
    dec hl
    dec hl
_:
    di
    inc sp
    inc sp
    inc sp
    inc sp
    ex (sp),hl
    dec sp
    dec sp
    dec sp
    dec sp
    pop af
    ret
restoredi:
    di
    ret
restoreei:
    ei
    ret

EDIT: calc84maniac pointed out that 'inc sp' can be dangerous without first disabling interrupts. I've reorganized the diRestore accordingly.
32-Bit Endian Swap (eZ80)

Swaps the byte order of a 32-bit value stored in EUHL. Can be adapted to work with other register combinations as well.


Code:
endianswap32:
    push hl
    ld h,e
    ld e,l
    inc sp
    push hl
    inc sp
    pop hl
    inc sp
    ret
The ClearHorizLine routine here never got updated, so I took a stab at it, writing a routine for black, white, and inverted horizontal lines. In mine, they all use a common subroutine, but if you take just the white+subroutine, it is 32 bytes smaller (25%), and all combined take 25 bytes over the original.
(NOTE: I edited the original routine to use `rla` instead of `rl a` to get these results.)

Code:

;Contains 3 routines and 1 subroutine.
;horizontalline_black
;    Draws a black horizontal line
;horizontalline_white
;    Draws a white horizontal line
;horizontalline_invert
;    Draws an inverted horizontal line
;horizontalline_sub
;    A routine common to the others that calculates masks and pointers.

horizontalline_black:
;Input:
;   A is y0
;   D is the width
;   C is x0
;Output:
;   Black horizontal line from (A,C) to (A,C+D-1)
;NOTE: This does not do any clipping. You can frick stuff up.
  call horizontalline_sub
  jr z,horizontalline_black_small
  ld d,b
  ld b,a
  ld a,(hl)
  or c
  ld (hl),a
  inc hl
  dec b
  jr z,horizontalline_black_last
_:
  ld (hl),$FF
  inc hl
  djnz -_
horizontalline_black_last:
  ld a,(hl)
  or d
  ld (hl),a
  ret
horizontalline_black_small:
  ld a,c
  and b
  or (hl)
  ld (hl),a
  ret


horizontalline_white:
;Input:
;   A is y0
;   D is the width
;   C is x0
;Output:
;   white horizontal line from (A,C) to (A,C+D-1)
;NOTE: This does not do any clipping. You can frick stuff up.
  call horizontalline_sub
  jr z,horizontalline_white_small
  ld d,b
  ld b,a
  ld a,c
  cpl
  and (hl)
  ld (hl),a
  inc hl
  dec b
  jr z,horizontalline_white_last
_:
  ld (hl),0
  inc hl
  djnz -_
horizontalline_white_last:
  ld a,d
  cpl
  and (hl)
  ld (hl),a
  ret
horizontalline_white_small:
  ld a,c
  and b
  cpl
  and (hl)
  ld (hl),a
  ret


horizontalline_invert:
;Input:
;   A is y0
;   D is the width
;   C is x0
;Output:
;   invert horizontal line from (A,C) to (A,C+D-1)
;NOTE: This does not do any clipping. You can frick stuff up.
  call horizontalline_sub
  jr z,horizontalline_invert_small
  ld d,b
  ld b,a
  ld a,(hl)
  xor c
  ld (hl),a
  inc hl
  dec b
  jr z,horizontalline_invert_last
_:
  ld a,(hl)
  cpl
  ld (hl),a
  inc hl
  djnz -_
horizontalline_invert_last:
  ld a,(hl)
  xor d
  ld (hl),a
  ret
horizontalline_invert_small:
  ld a,c
  and b
  xor (hl)
  ld (hl),a
  ret


horizontalline_sub:
;Input:
;   A is y0
;   D is the width
;   C is x0
;Output:
;   A is the width in bytes of the line
;   C is the left mask
;   B is the right mask
;   HL points to where to begin drawing.
;   z flag is set if the line starts and ends on the same byte.

;First compute the address of the byte to start drawing at.
  ld h,0
  ld b,h
  ld l,a
  add a,a
  add a,l
  ld l,a
  add hl,hl
  add hl,hl
  ld a,c
  ld e,c    ;for safe keeping
  srl c
  srl c
  srl c
  add hl,bc
  ld bc,plotSScreen
  add hl,bc

;Now compute the left mask
  and 7
  ld b,a
  ld a,$FF
  jr z,+_
  rra
  djnz $-1
_:
;Increment A and invert
  inc a
  cpl
  ld c,a    ;C is the left mask

;now compute the right mask at E+D-1
  ld a,d
  add a,e
  dec a
  ld d,a
  and 7
  ld b,a
  ld a,$7F
  jr z,+_
  rrca
  djnz $-1
_:
  inc a
  ld b,a    ;B is the right mask

;Now compute the width in bytes of the line
;(D>>3)-(E>>3)
  ld a,d
  and %11111000
  rrca
  rrca
  rrca
  srl e
  srl e
  srl e
  sub e
  ret

I am going to continue editing this post if I come across other optimizations.

EDIT: Here is an optimized version of Ashbad's itoa_8 routine. It's much faster and less than half the size:


Code:

;Converts an 8-bit signed integer to a string

itoa_8:
;Input:
;   A is a signed integer
;   HL points to where the null-terminated ASCII string is stored (needs at most 5 bytes)
;Output:
;   The number is converted to a null-terminated string at HL
;Destroys:
;   Up to five bytes at HL
;   All registers preserved.
;on 0 to 9:       252       D=0
;on 10 to 99:     258+20D   D=0 to 9
;on 100 to 127:   277+20D   D=0 to 2
;on -1 to -9:     276       D=0
;on -10 to -99:   282+20D   D=0 to 9
;on -100 to -128: 301+20D   D=0 to 2

;min: 252cc  (+23cc over original)
;max: 462cc  (-49cc over original)
;avg: 343.74609375cc = 87999/256
;54 bytes
  push hl
  push de
  push bc
  push af
  or a
  jp p,itoa_pos
  neg
  ld (hl),$1A  ;start if neg char on TI-OS
  inc hl
itoa_pos:
;A is on [0,128]
;calculate 100s place, plus 1 for a future calculation
  ld b,'0'
  cp 100 \ jr c,$+5 \ sub 100 \ inc b

;calculate 10s place digit, +1 for future calculation
  ld de,$0A2F
  inc e \ sub d \ jr nc,$-2
  ld c,a

;Digits are now in D, C, A
; strip leading zeros!
  ld a,'0'
  cp b \ jr z,$+5 \ ld (hl),b \ inc hl \ .db $FE  ; start of `cp *` to skip the next byte, turns into `cp $BB` which will always return nz and nc
  cp e \ jr z,$+4 \ ld (hl),e \ inc hl
  add a,c
  add a,d
  ld (hl),a
  inc hl
  ld (hl),0

  pop af
  pop bc
  pop de
  pop hl
  ret


EDIT: Optimized the above itoa_8 routine more. Here are some more number-to-string routines:
uitoa_8
Converts an 8-bit unsigned integer to an ASCII string.

Code:

;Converts an 8-bit unsigned integer to a string

uitoa_8:
;Input:
;   A is a signed integer
;   HL points to where the null-terminated ASCII string is stored (needs at most 5 bytes)
;Output:
;   The number is converted to a null-terminated string at HL
;Destroys:
;   Up to four bytes at HL
;   All registers preserved.
;on 0 to 9:     238              D=0
;on 10 to 99:   244+20D          D=0 to 9
;on 100 to 255: 257+2{0,6}+20D   D=0 to 5
;min: 238cc
;max: 424cc
;avg: 317.453125cc = 81268/256 = (238*10 + 334*90+313*156)/256
;52 bytes

  push hl
  push de
  push bc
  push af
;A is on [0,255]
;calculate 100s place, plus 1 for a future calculation
  ld b,'0'
  cp 100 \ jr c,$+5 \ sub 100 \ inc b
  cp 100 \ jr c,$+5 \ sub 100 \ inc b

;calculate 10s place digit, +1 for future calculation
  ld de,$0A2F
  inc e \ sub d \ jr nc,$-2
  ld c,a

;Digits are now in D, C, A
; strip leading zeros!
  ld a,'0'
  cp b \ jr z,$+5 \ ld (hl),b \ inc hl \ .db $FE  ; start of `cp *` to skip the next byte, turns into `cp $BB` which will always return nz and nc
  cp e \ jr z,$+4 \ ld (hl),e \ inc hl
  add a,c
  add a,d
  ld (hl),a
  inc hl
  ld (hl),0

  pop af
  pop bc
  pop de
  pop hl
  ret


itoa_16
Converts a 16-bit signed integer to an ASCII string.

Code:

;Converts a 16-bit signed integer to an ASCII string.

itoa_16:
;Input:
;   DE is the number to convert
;   HL points to where to write the ASCII string (up to 7 bytes needed).
;Output:
;   HL points to the null-terminated ASCII string
;      NOTE: This isn't necessarily the same as the input HL.
  push de
  push bc
  push af
  push hl
  bit 7,d
  jr z,+_
  xor a
  sub e
  ld e,a
  sbc a,a
  sub d
  ld d,a
  ld (hl),$1A     ;negative char on TI-OS
  inc hl
_:
  ex de,hl

  ld bc,-10000
  ld a,'0'-1
  inc a \ add hl,bc \ jr c,$-2
  ld (de),a
  inc de

  ld bc,1000
  ld a,'9'+1
  dec a \ add hl,bc \ jr nc,$-2
  ld (de),a
  inc de

  ld bc,-100
  ld a,'0'-1
  inc a \ add hl,bc \ jr c,$-2
  ld (de),a
  inc de

  ld a,l
  ld h,'9'+1
  dec h \ add a,10 \ jr nc,$-3
  add a,'0'
  ex de,hl
  ld (hl),d
  inc hl
  ld (hl),a
  inc hl
  ld (hl),0

;No strip the leading zeros
  pop hl

;If the first char is a negative sign, skip it
  ld a,(hl)
  cp $1A
  push af
  ld a,'0'
  jr nz,$+3
  inc hl
  cp (hl)
  jr z,$-2

;Check if we need to re-write the negative sign
  pop af
  jr nz,+_
  dec hl
  ld (hl),a
_:

  pop af
  pop bc
  pop de
  ret


uitoa_16
Converts a 16-bit unsigned integer to an ASCII string.

Code:

;Converts a 16-bit unsigned integer to an ASCII string.

uitoa_16:
;Input:
;   DE is the number to convert
;   HL points to where to write the ASCII string (up to 6 bytes needed).
;Output:
;   HL points to the null-terminated ASCII string
;      NOTE: This isn't necessarily the same as the input HL.
  push de
  push bc
  push af
  ex de,hl

  ld bc,-10000
  ld a,'0'-1
  inc a \ add hl,bc \ jr c,$-2
  ld (de),a
  inc de

  ld bc,1000
  ld a,'9'+1
  dec a \ add hl,bc \ jr nc,$-2
  ld (de),a
  inc de

  ld bc,-100
  ld a,'0'-1
  inc a \ add hl,bc \ jr c,$-2
  ld (de),a
  inc de

  ld a,l
  ld h,'9'+1
  dec h \ add a,10 \ jr nc,$-3
  add a,'0'
  ex de,hl
  ld (hl),d
  inc hl
  ld (hl),a
  inc hl
  ld (hl),0

;No strip the leading zeros
  ld c,-6
  add hl,bc
  ld a,'0'
  inc hl \ cp (hl) \ jr z,$-2
  pop af
  pop bc
  pop de
  ret
Here are some routines that I've added to the repository:

fixed88_to_string
Uses the itoa_8 routine in the previous post to convert an 8.8 fixed-point number to a string.

Code:

;This converts a fixed-point number to a string.
;It displays up to 3 digits after the decimal.

fixed88_to_str:
;Inputs:
;   D.E is the fixed-point number
;   HL points to where the string gets output.
;      Needs at most 9 bytes.
;Outputs:
;   HL is preserved
;Destroys:
;   AF,DE,BC

;First check if the input is negative.
;If so, write a negative sign and negate
  push hl
  ld a,d
  or a
  jp p,+_
  ld (hl),$1A      ;negative sign on TI-OS
  inc hl
  xor a
  sub e
  ld e,a
  sbc a,a
  sub d
_:

;Our adjusted number is in A.E
;Now we can print the integer part
  call itoa_8

;Check if we need to print the fractional part
  xor a
  cp e
  jr z,fixed88_to_str_end

;We need to write the fractional part, so seek the end of the string
;Search for the null byte. A is already 0
  cpir

;Write a decimal
  dec hl
  ld (hl),'.'

  ld b,3
_:
;Multiply E by 10, converting overflow to an ASCII digit
  call fixed88_to_str_e_times_10
  inc hl
  ld (hl),a
  djnz -_

;Strip the ending zeros
  ld a,'0'
_:
  cp (hl)
  dec hl
  jr z,-_

;write a null byte
  inc hl
  inc hl
  ld (hl),0

fixed88_to_str_end:
;restore HL
  pop hl
  ret

fixed88_to_str_e_times_10:
  ld a,e
  ld d,0
  add a,a \ rl d
  add a,a \ rl d
  add a,e \ jr nc,$+3 \ inc d
  add a,a
  ld e,a
  ld a,d
  rla
  add a,'0'
  ret


sqrtA
This is a very fast, unrolled routine to compute the square root of A.


Code:

sqrtA:
;Input: A
;Output: D is the square root, A is the remainder (input-D^2)
;Destroys: BC
;speed: 161+{0,6}+{0,1}+{0,1}+{0,3}
;min: 161cc
;max: 172cc
;avg: 166.5cc
;45 bytes
  ld d,$40

  sub d
  jr nc,+_
  add a,d
  ld d,0
_:

  set 4,d
  sub d
  jr nc,+_
  add a,d
  .db $01   ;start of ld bc,** which is 10cc to skip the next two bytes.
_:
  set 5,d
  res 4,d
  srl d

  set 2,d
  sub d
  jr nc,+_
  add a,d
  .db $01   ;start of ld bc,** which is 10cc to skip the next two bytes.
_:
  set 3,d
  res 2,d
  srl d

  inc d
  sub d
  jr nc,+_
  add a,d
  dec d
_:
  inc d
  srl d
  ret


sqrtfixed_88
An unrolled, fast 8.8 fixed-point square root routine. Uses the above sqrtA routine.

Code:

sqrtfixed_88:
;Input: A.E ==> D.E
;Output: DE is the sqrt, AHL is the remainder
;Speed: 690+6{0,13}+{0,3+{0,18}}+{0,38}+sqrtA
;min: 855cc
;max: 1003cc
;avg: 924.5cc
;152 bytes

  call sqrtA
  ld l,a
  ld a,e
  ld h,0
  ld e,d
  ld d,h

  sla e
  rl d

  sll e \ rl d
  add a,a \ adc hl,hl
  add a,a \ adc hl,hl
  sbc hl,de
  jr nc,+_
  add hl,de
  dec e
  .db $FE     ;start of `cp *`
_:
  inc e

  sll e \ rl d
  add a,a \ adc hl,hl
  add a,a \ adc hl,hl
  sbc hl,de
  jr nc,+_
  add hl,de
  dec e
  .db $FE     ;start of `cp *`
_:
  inc e

  sll e \ rl d
  add a,a \ adc hl,hl
  add a,a \ adc hl,hl
  sbc hl,de
  jr nc,+_
  add hl,de
  dec e
  .db $FE     ;start of `cp *`
_:
  inc e

  sll e \ rl d
  add a,a \ adc hl,hl
  add a,a \ adc hl,hl
  sbc hl,de
  jr nc,+_
  add hl,de
  dec e
  .db $FE     ;start of `cp *`
_:
  inc e

;Now we have four more iterations
;The first two are no problem
  sll e \ rl d
  add hl,hl
  add hl,hl
  sbc hl,de
  jr nc,+_
  add hl,de
  dec e
  .db $FE     ;start of `cp *`
_:
  inc e

  sll e \ rl d
  add hl,hl
  add hl,hl
  sbc hl,de
  jr nc,+_
  add hl,de
  dec e
  .db $FE     ;start of `cp *`
_:
  inc e

sqrtfixed_88_iter11:
;On the next iteration, HL might temporarily overflow by 1 bit
  sll e \ rl d      ;sla e \ rl d \ inc e
  add hl,hl
  add hl,hl
  jr c,sqrtfixed_88_iter11_br0
;
  sbc hl,de
  jr nc,+_
  add hl,de
  dec e
  jr sqrtfixed_88_iter12
sqrtfixed_88_iter11_br0:
  or a
  sbc hl,de
_:
  inc e

;On the next iteration, HL is allowed to overflow, DE could overflow with our current routine, but it needs to be shifted right at the end, anyways
sqrtfixed_88_iter12:
  ld b,a      ;A is 0, so B is 0
  add hl,hl
  add hl,hl
  rla
;AHL - (DE+DE+1)
  sbc hl,de \ sbc a,b
  inc e
  or a
  sbc hl,de \ sbc a,b
  ret p
  add hl,de
  adc a,b
  dec e
  add hl,de
  adc a,b
  ret


ncr_HL_DE
Computes 'HL choose DE' in such a way so that overflow only occurs if the final result overflows 16 bits.

Code:

; Requires
;    mul16          ;BC*DE ==> DEHL
;    DEHL_Div_BC    ;DEHL/BC ==> DEHL

ncr_HL_DE:
;"n choose r", defined as n!/(r!(n-r)!)
;Computes "HL choose DE"
;Inputs: HL,DE
;Outputs:
;   HL is the result
;       "HL choose DE"
;   carry flag reset means overflow
;Destroys:
;   A,BC,DE,IX
;Notes:
;   Overflow is returned as 0
;   Overflow happens if HL choose DE exceeds 65535
;   This algorithm is constructed in such a way that intermediate
;   operations won't erroneously trigger overflow.
;66 bytes
  ld bc,1
  or a
  sbc hl,de
  jr c,ncr_oob
  jr z,ncr_exit
  sbc hl,de
  add hl,de
  jr c,$+3
  ex de,hl
  ld a,h
  or l
  push hl
  pop ix
ncr_exit:
  ld h,b
  ld l,c
  scf
  ret z
ncr_loop:
  push bc \ push de
  push hl \ push bc
  ld b,h
  ld c,l
  call mul16          ;BC*DE ==> DEHL
  pop bc
  call DEHL_Div_BC    ;result in DEHL
  ld a,d
  or e
  pop bc
  pop de
  jr nz,ncr_overflow
  add hl,bc
  jr c,ncr_overflow
  pop bc
  inc bc
  ld a,b
  cp ixh
  jr c,ncr_loop
  ld a,ixl
  cp c
  jr nc,ncr_loop
  ret
ncr_overflow:
  pop bc
  xor a
  ld b,a
ncr_oob:
  ld h,b
  ld l,b
  ret
Masked Sprite
Earlier in the thread, Kerm offered a masked sprite routine. Fallen Ghost made some improvements, making it a smaller and faster and taking care of a minor bug, but at the cost of using the shadow registers (which needs interrupts off in many cases).
I optimized Fallen Ghost's routine, saving another 4 bytes and making it a little faster:

Code:

;Masked Sprite routine
;This is optimized by Zeda from a routine by Fallen Ghost, as a response to Kerm's routine.
;NOTE: Uses shadow registers!

PutMask:
;Inputs:
;   a = X coordinate
;   l = Y coordinate
;   b= height of sprite
;   ix = sprite address
;   ix+b = mask address (same length as sprite, please)
;
;Outputs:
;   Mask and then sprite displayed at specified location on the graph buffer
;
;Destroys:
;   AF, BC, DE, HL, IX, DE', HL' (shadow registers)
;
;Improvements from Fallen Ghost's version:
;   - 4 bytes less
;   - Faster

  ld e,l
  ld h,0
  ld d,h
  add hl,hl
  add hl,de
  add hl,hl
  add hl,hl
  ld e,a
  and 7
  ld c,a
  xor e  ;essentially gets E with the bottom 3 bits reset
#if (plotSScreen&255) = 64
  inc a
  rra
  rra
  rra
  ld e,a
  ld d,plotSScreen>>8
#else
  rra
  rra
  rra
  ld e,a
  add hl,de
  ld de,plotSScreen
#endif
  add hl,de

   ld a,b
   exx
   add a,ixl
   ld l,a
   ld a,ixh
   adc a,0
   ld h,a
   exx
putMaskLoop1:
  exx
  ld      d,0FFh
  ld      e,(hl)
  inc hl
  exx
  xor a
   ld   d,(ix)
   ld   e,a
   sub c
   jr   z,putMaskSkip1
putMaskLoop2:
  exx
  rr e
  rr d
  exx
   srl   d
   rr   e
   sub -1
   jr   nz,putMaskLoop2
putMaskSkip1:
   ld   a,(hl)
   exx
   and e
   exx
   or   d
   ld   (hl),a
   inc   hl
   ld   a,(hl)
   exx
   and     d
   exx
   or   e
   ld   (hl),a
   ld   de,00Bh
   add   hl,de
   inc   ix
   djnz   putMaskLoop1
   ret

However, the reason they had to resort to the shadow registers was due to the sprite format. Instead of keeping the mask and the sprite data in two different chunks of memory, you can interleave them and avoid having to calculate another pointer. Doing it this way, I was able to save 17 more bytes and make it even faster:

Code:

;Masked Sprite routine
putsprite_masked:
;Inputs:
;   (A,L) = (x,y)
;   B is height
;   IX points to the sprite data
;       first byte is the data
;       second byte is mask
;       continues, alternating like this.
;
;Outputs:
;   Mask is ANDed to the buffer, then data is ORed on top of that.
;
;Destroys:
;   AF, BC, DE, HL, IX
;
;Notes:
;   To set a pixel...
;     black: mask is any, data is 1
;     white: mask is 0, data is 0
;     clear: mask is 1, data is 0 (keeps the data from the buffer)
;
;This routine is free to use :)
;65 bytes (or 66 bytes if gbuf is not located at 0x**40

  ld e,l
  ld h,0
  ld d,h
  add hl,hl
  add hl,de
  add hl,hl
  add hl,hl
  ld e,a
  and 7
  ld c,a
  xor e  ;essentially gets E with the bottom 3 bits reset
#if (plotSScreen&255) = 64
  inc a
  rra
  rra
  rra
  ld e,a
  ld d,plotSScreen>>8
#else
  rra
  rra
  rra
  ld e,a
  add hl,de
  ld de,plotSScreen
#endif
  add hl,de


putsprite_masked_loop:
  push bc
  xor a
  ld   d,(ix)
  ld   e,a
  sub c
  ld b,c
  ld c,$FF
  inc ix
  ld a,(ix)
  jr   z,putsprite_masked_rotdone
putsprite_masked_rot:
  scf
  rra
  rr c
  srl   d
  rr   e
  djnz putsprite_masked_rot
putsprite_masked_rotdone:
  and (hl)
  or   d
  ld   (hl),a
  inc   hl
  ld   a,(hl)
  and c
  or   e
  ld   (hl),a
  ld c,11
  add   hl,bc
  inc   ix
  pop bc
  djnz   putsprite_masked_loop
  ret


We can save even more if we don't have to rotate in '1' bits into the mask, but this requires us to use a non-traditional masking method (but better in my opinion). We can OR the mask, then XOR the sprite data. This actually gives us an additional option for masking, too. Instead of black/white/clear, we can get black/white/clear/invert. See the 'Notes' section in the code below for the rules for generating a sprite:

Code:

;Masked Sprite routine
putsprite_masked:
;Inputs:
;   (A,L) = (x,y)
;   B is height
;   IX points to the sprite data
;       first byte is the data
;       second byte is mask
;       continues, alternating like this.
;
;Outputs:
;   Mask is ORed to the buffer, then data is XORed on top of that.
;
;Destroys:
;   AF, BC, DE, HL, IX
;
;Notes:
;   To set a pixel...
;     black: mask is 1, data is 0
;     white: mask is 1, data is 1
;     clear: mask is 0, data is 0 (keeps the data from the buffer)
;     invert: mask is 0, data is 1 (inverts the data from the buffer)
;
;This routine is free to use :)
;63 bytes (or 64 bytes if gbuf is not located at 0x**40

  ld e,l
  ld h,0
  ld d,h
  add hl,hl
  add hl,de
  add hl,hl
  add hl,hl
  ld e,a
  and 7
  ld c,a
  xor e  ;essentially gets E with the bottom 3 bits reset
#if (plotSScreen&255) = 64
  inc a
  rra
  rra
  rra
  ld e,a
  ld d,plotSScreen>>8
#else
  rra
  rra
  rra
  ld e,a
  add hl,de
  ld de,plotSScreen
#endif
  add hl,de

putsprite_masked_loop:
  push bc
  xor a
  ld d,(ix)
  ld e,a
  or c
  ld b,c
  ld c,e
  inc ix
  ld a,(ix)
  jr z,putsprite_masked_rotdone
putsprite_masked_rot:
  rra
  rr c
  srl d
  rr e
  djnz putsprite_masked_rot
putsprite_masked_rotdone:
  or (hl)
  xor d
  ld (hl),a
  inc hl
  ld a,(hl)
  or c
  xor e
  ld (hl),a
  ld c,11
  add hl,bc
  inc ix
  pop bc
  djnz putsprite_masked_loop
  ret

In these two routines, note that I organize it with data first, then mask. In many cases, it could be cheaper for other operations if you put it in mask-data order. Since we are using the index registers to get the data, it's easy enough to swap the formats Smile



Big Sprites

I also made some "bigsprite" routines! These do have clipping, too. First, they use some common subroutines for computing masks and performing most of the clipping and shifting:

Code:

;133 bytes total

;This is made by Zeda, feel free to use it for whatever.
;Takes inputs for a big sprite and sets up masks and clipping
;requires 4 bytes of temporary RAM, but doesn't use SMC


spritetmp = 8000h     ;relocate this as needed! Just need 4 bytes.
sprite_width  = spritetmp+0
sprite_x      = spritetmp+1
sprite_mask0  = spritetmp+2
sprite_mask1  = spritetmp+3

bigsprite_subroutine:
;Inputs:
;     B is the X-coordinate
;     C is the Y-Coordinate
;     DE points to the sprite
;     H is the height
;     L is the width in bytes
;Outputs:
;   carry flag is set if okay to draw, nc if out-of-bounds.
;   B is height.
;   C is width.
;   HL points to the byte to start drawing at.
;   DE points to where to start sourcing the sprite data
;   (sprite_width) is the width of the sprite in bytes
;   (sprite_x) is the intitial x coordinate to begin drawing at
;   (sprite_mask0) is the left mask
;   (sprite_mask1) is the right mask
;92 bytes

;First check if the sprite is on-screen in the horizontal direction
  ld a,c
  cp 64
  jr c,+_
  add a,h
  ret nc
  ld h,a
  push hl
  xor a
  ld h,a
  sub c
  ex de,hl
  add hl,de
  dec a
  jr nz,$-2
  ex de,hl
  pop hl
  xor a
  ld c,a
_:
;Next check h+c<=64
  ld a,64
  sub c
  cp h
  jr nc,+_
  ld h,a
_:

;Make sure the height is not now 0
  ld a,h
  or a
  ret z

;Save the width and height of the sprite
  push hl               ;height,width
  ld h,b
  ld (sprite_width),hl  ;x,width
  push de               ;sprite pointer

;Set up a pointer to the routine for shifting the  routine for shifting the sprite data
  ld ixh,rshiftHA_7>>8
  ld a,h
  cpl
  and 7
  ld l,a
  add a,a
  add a,l
  add a,rshiftHA_7&255
  ld ixl,a
#if (rshiftHA_7&255)>234
  jr nc,$+4
  inc ixh
#endif

  ld a,b
  and 7
  ld de,spritemask
  add a,e
  ld e,a
#if spritemask&255>248
  jr nc,$+3
  inc d
#endif
  ld a,(de)
  ld (sprite_mask0),a
  cpl
  ld (sprite_mask1),a
;
;
  ld a,c
  add a,a
  sbc a,a
  ld h,a
  ld a,b
  ld b,h
  ld l,c
  add hl,hl
  add hl,bc
  add hl,hl
  add hl,hl
  ld c,a
  add a,a
  sbc a,a
  ld b,a
  ld a,c
  sra c
  sra c
  sra c
  add hl,bc
  ld bc,plotSScreen
  add hl,bc

  pop de
  pop bc
  ;B is height
  ;C is width
  ex de,hl
  scf
  ret

rshiftHA_7:
  rr h \ rra
  rr h \ rra
  rr h \ rra
  rr h \ rra
  rr h \ rra
  rr h \ rra
  rr h \ rra
  ex de,hl
  ld e,a
  ret

spritemask:
  .db $00,$80,$C0,$E0,$F0,$F8,$FC,$FE
call_ix:
  jp (ix)

Then you can draw a big sprite with OR logic:

Code:

bigsprite_OR:
;Inputs:
;     B is the X-coordinate
;     C is the Y-Coordinate
;     DE points to the sprite
;     H is the height
;     L is the width in bytes
;68 bytes

;Set up the clipping
  call bigsprite_subroutine
  ret nc

bigsprite_OR_loop:
  push bc   ;height,width
  push de   ;gbuf ptr
  push hl   ;sprite data pointer
  ld a,(sprite_x)
  ld c,a
  add a,8
  ld (sprite_x),a

spriteloop_OR:
  push bc
  push hl
  ld h,(hl)
  xor a
  call call_ix
  ld a,c
  cp 96
  jr nc,+_
  ld a,(hl)
  or d
  ld (hl),a
  ld a,c
_:
  inc hl
  add a,8
  cp 96
  jr nc,+_
  ld a,(sprite_mask1)
  ld a,(hl)
  or e
  ld (hl),a
_:
  ld bc,11
  add hl,bc
  ex de,hl
  pop hl
  ld a,(sprite_width)
  ld c,a
  add hl,bc
  pop bc
  djnz spriteloop_OR


  pop hl
  inc hl
  pop de
  inc de
  pop bc
  dec c
  jr nz,bigsprite_OR_loop
  ret

Or draw with XOR logic:

Code:

bigsprite_XOR:
;Inputs:
;     B is the X-coordinate
;     C is the Y-Coordinate
;     DE points to the sprite
;     H is the height
;     L is the width in bytes
;68 bytes

;Set up the clipping
  call bigsprite_subroutine
  ret nc

bigsprite_XOR_loop:
  push bc   ;height,width
  push de   ;gbuf ptr
  push hl   ;sprite data pointer
  ld a,(sprite_x)
  ld c,a
  add a,8
  ld (sprite_x),a

spriteloop_XOR:
  push bc
  push hl
  ld h,(hl)
  xor a
  call call_ix
  ld a,c
  cp 96
  jr nc,+_
  ld a,(hl)
  xor d
  ld (hl),a
  ld a,c
_:
  inc hl
  add a,8
  cp 96
  jr nc,+_
  ld a,(sprite_mask1)
  ld a,(hl)
  xor e
  ld (hl),a
_:
  ld bc,11
  add hl,bc
  ex de,hl
  pop hl
  ld a,(sprite_width)
  ld c,a
  add hl,bc
  pop bc
  djnz spriteloop_XOR


  pop hl
  inc hl
  pop de
  inc de
  pop bc
  dec c
  jr nz,bigsprite_XOR_loop
  ret

Or draw with AND logic:

Code:

bigsprite_AND:
;Inputs:
;     B is the X-coordinate
;     C is the Y-Coordinate
;     DE points to the sprite
;     H is the height
;     L is the width in bytes
;69 bytes

;Set up the clipping
  call bigsprite_subroutine
  ret nc

bigsprite_AND_loop:
  push bc   ;height,width
  push de   ;gbuf ptr
  push hl   ;sprite data pointer
  ld a,(sprite_x)
  ld c,a
  add a,8
  ld (sprite_x),a

spriteloop_AND:
  push bc
  push hl
  ld h,(hl)
  scf \ sbc a,a
  call call_ix
  ld a,c
  cp 96
  jr nc,+_
  ld a,(hl)
  and d
  ld (hl),a
  ld a,c
_:
  inc hl
  add a,8
  cp 96
  jr nc,+_
  ld a,(sprite_mask1)
  ld a,(hl)
  and e
  ld (hl),a
_:
  ld bc,11
  add hl,bc
  ex de,hl
  pop hl
  ld a,(sprite_width)
  ld c,a
  add hl,bc
  pop bc
  djnz spriteloop_AND


  pop hl
  inc hl
  pop de
  inc de
  pop bc
  dec c
  jr nz,bigsprite_AND_loop
  ret

Or draw with Erase logic:

Code:

bigsprite_Erase:
;Inputs:
;     B is the X-coordinate
;     C is the Y-Coordinate
;     DE points to the sprite
;     H is the height
;     L is the width in bytes
;67 bytes

;Set up the clipping
  call bigsprite_subroutine
  ret nc

bigsprite_Erase_loop:
  push bc   ;height,width
  push de   ;gbuf ptr
  push hl   ;sprite data pointer
  ld a,(sprite_x)
  ld c,a
  add a,8
  ld (sprite_x),a

spriteloop_Erase:
  push bc
  push hl
  ld h,(hl)
  xor a
  call call_ix
  ld a,c
  cp 96
  jr nc,+_
  ld a,d
  cpl
  and (hl)
  ld (hl),a
  ld a,c
_:
  inc hl
  add a,8
  cp 96
  jr nc,+_
  ld a,e
  cpl
  and (hl)
  ld (hl),a
_:
  ld bc,11
  add hl,bc
  ex de,hl
  pop hl
  ld a,(sprite_width)
  ld c,a
  add hl,bc
  pop bc
  djnz spriteloop_Erase

  pop hl
  inc hl
  pop de
  inc de
  pop bc
  dec c
  jr nz,bigsprite_Erase_loop
  ret

Or draw with Overwrite logic:

Code:

bigsprite_Overwrite:
;Inputs:
;     B is the X-coordinate
;     C is the Y-Coordinate
;     DE points to the sprite
;     H is the height
;     L is the width in bytes
;71 bytes

;Set up the clipping
  call bigsprite_subroutine
  ret nc

bigsprite_Overwrite_loop:
  push bc   ;height,width
  push de   ;gbuf ptr
  push hl   ;sprite data pointer
  ld a,(sprite_x)
  ld c,a
  add a,8
  ld (sprite_x),a

spriteloop_Overwrite:
  push bc
  push hl
  ld h,(hl)
  xor a
  call call_ix
  ld a,c
  cp 96
  jr nc,+_
  ld a,(sprite_mask0)
  and (hl)
  or d
  ld (hl),a
  ld a,c
_:
  inc hl
  add a,8
  cp 96
  jr nc,+_
  ld a,(sprite_mask1)
  and (hl)
  or e
  ld (hl),a
_:
  ld bc,11
  add hl,bc
  ex de,hl
  pop hl
  ld a,(sprite_width)
  ld c,a
  add hl,bc
  pop bc
  djnz spriteloop_Overwrite

  pop hl
  inc hl
  pop de
  inc de
  pop bc
  dec c
  jr nz,bigsprite_Overwrite_loop
  ret


EDIT: Formatting and added bigsprite routines
Here is a useful routine! It is a scrollable menu that can have multiple titles, but works on the graph screen.

Feel free to use any of these!

Scrollable Menu With Multiple Headings

Code:

;This routine is a little complicated to set up!
;It is intended to work in a variety of environments, so it needs a few
;variables, and 17 bytes of RAM.

;===============================================================================
;First, you'll need to define the routine that you are using to display text.
;#define Text() bcall(_VPutS)

;How tall is the text?
;#define TEXT_HEIGHT 6

;Are text coords stored with the upper byte as Y and lower byte as X ?
;If so, uncomment the following! You'll want this for VPutS, for example.
;#define TEXTCOORD_YX      ;define if textcoord is Y then X (TI-OS is Y then X for small font, large font and some custom text routines use the opposite)

;Where are the text coordinates stored?
;textcoord       = penCol

;How are you updating the LCD? Expect interrupts to be on, but you may turn them off.
; #macro LCD_Update()
;   di
;   call UpdateLCD
; #endmacro

;We need to draw rectangles, so what routines do you want to use?
;Inputs are (B,C)=(x,y), and (D,E)=(width,height)
; #macro rect_OR()
;   ;Draw a black rectangle
;   ld hl,plotSScreen
;   call rectOR
; #endmacro
;
; #macro rect_XOR()
;   ;Draw an inverted rectangle
;   ld hl,plotSScreen
;   call rectXOR
; #endmacro
;
; #macro rect_Erase()
;   ;Draw a white rectangle
;   ld hl,plotSScreen
;   call rectErase
; #endmacro
;

;Now define where vars are located, 17 bytes required in all.
;textbox_top     = $8000
;textbox_left    = textbox_top+1    ;needs to follow textbox_top
;textbox_bottom  = $8002
;textbox_right   = textbox_bottom+1 ;needs to follow textbox_bottom
;menucallptr     = $8004
;menutop         = $8006
;menucur         = $8008
;menuselection   = $800A
;menuheader_ptr  = $800C
;menuheader_coord= $800E
;menuheader_cur  = $8010   ;1 byte
;===============================================================================


menu:
;Input:
;   (B,C) = (x,y)
;   (D,E) = (width,height)
;   HL points to the header
;   IX points to the get/select code
;     If you are using the TI-OS VPutS routine, you'll need to have the textWrite flag set in sGrFlags
;Notes:
;   The header is set up as follows:
;       .db number_of_titles
;       .db "title 0",0
;       .db "title 1",0
;       .db "title 2",0
;       ...
;
;   The get/select code is passed the following information:
;       A is the index of the current title (0 is the first, 1 is the second, etc.)
;       HL is the index of the currently highlighted item
;       carry flag is reset if it needs to return a pointer to the string for the element
;           Return carry flag as set if the element is out-of-bounds.
;       carry flag is set if it needs to return a pointer to the data for the element
  ld (menucallptr),ix

;save box dimensions
  push bc

;Save the pointer to the menu headers field
  ld (menuheader_ptr),hl

;Set the menu header to header 0
  xor a
  ld (menuheader_cur),a

;establish the bottom and right edges of the textbox
  ld a,c
  add a,e
  ld (textbox_bottom),a
  ld a,b
  add a,d
  ld (textbox_right),a

; Draw the rectangle for the menu. Black border, white fill.
  rect_OR()

; Display the header
; get coords
  pop hl
  inc h
  inc h
  inc l
  push hl
#ifdef TEXTCOORD_YX
  ;need to swap the coords order
  ld a,h
  ld h,l
  ld l,a
#endif
  ld (menuheader_coord),hl
  call draw_header
  pop bc


;Before we do too much, let's establish the top-left textbox boundaries.
  ld a,TEXT_HEIGHT+2
  add a,c
  ld c,a
  ld (textbox_top),bc

  call reset_menu_cursor

;===============================================================================
; Now we have drawn the menu box, we have to populate it.
; We'll call a routine to get the n-th string to be displayed.
; Stop fetching once the next item would go at or past textbox_bottom.
; Draw at least one item.

menu_render:


;Restore the text coordinates top
  ld hl,(textbox_top)
#ifndef TEXT_PAD_TOP
  inc l
#endif
#ifdef TEXTCOORD_YX
  ;need to swap the coords order
  ld a,h
  ld h,l
  ld l,a
#endif
  ld (textcoord),hl

;rerender the items
  call menu_render_sub
menu_render_0:
;highlight the current option
  ld bc,(menuselection)
  ld hl,(textbox_bottom)
  ld a,h
  sub b
  ld d,a
  ld e,TEXT_HEIGHT+1
  dec d
  push de
  push bc
  rect_XOR()
  LCD_Update()
  pop bc
  pop de
  rect_XOR()
;wait for a keypress
_:
  in a,(4)
  and 8
  jr z,menu_get_select_err

  call getKeyDebounce
  or a
  jr z,-_
  cp 9
  scf
  jr z,menu_get_select
  cp 15
  jr z,menu_get_select_err

  call menu_arrow
  jr c,menu_render_0
  jr menu_render

menu_get_select:
  ld hl,(menucallptr)
  ld a,(menuheader_cur)
  jp (hl)

menu_render_sub:
; need to clear out the textbox
  ld bc,(textbox_top)
  ld hl,(textbox_bottom)
  ld a,h
  sub b
  ld d,a
  ld a,l
  sub c
  ld e,a
  dec e
  dec b
  rect_Erase()
  xor a
  ld bc,(menutop)

menu_render_sub_loop:
  push bc
  call menu_get_select
  pop bc
  ret c
  ld de,(textcoord)
  push de
  Text()
  pop de
#ifdef TEXTCOORD_YX
  ld a,TEXT_HEIGHT
  add a,d
  ld d,a
  ld a,(textbox_bottom)
#ifdef TEXT_PAD_TOP
  sub TEXT_HEIGHT+2
#else
  sub TEXT_HEIGHT+1
#endif
  cp d
#else
  ld a,TEXT_HEIGHT
  add a,e
  ld e,a
  ld a,(textbox_bottom)
#ifdef TEXT_PAD_TOP
  sub TEXT_HEIGHT+2
#else
  sub TEXT_HEIGHT+1
#endif
  cp e
#endif
  ld (textcoord),de
  inc bc
  jr nc,menu_render_sub_loop
  ret

menu_get_select_err:
;return a null pointer.
;A=0, HL=0
  xor a
  ld h,a
  ld l,a
  ret

menu_arrow:
;check arrow keys
  dec a
  jr z,menu_down
  dec a
  jr z,menu_left
  dec a
  jr z,menu_right
  add a,-1      ;sets the carry flag if it is not a keypress
  ret nz

;if would select oob
;   if next element exists
;       increment the menutop and rerender the menu
;else
;   move menuselection
menu_up:
  or a
  ld bc,(menucur)
  dec bc
  push bc
  call menu_get_select
  pop hl
  ret c
  ld (menucur),hl

  ld a,(menuselection)
  ld hl,(textbox_top)
  cp l
  jr z,+_
  sub TEXT_HEIGHT
  ld (menuselection),a
  scf
  ret
_:
  ld hl,(menutop)
  dec hl
  ld (menutop),hl
  ret

menu_down:
  or a
  ld bc,(menucur)
  inc bc
  push bc
  call menu_get_select
  pop hl
  ret c
  ld (menucur),hl

  ld a,(menuselection)
  add a,TEXT_HEIGHT+TEXT_HEIGHT+1
  ld hl,(textbox_bottom)
  cp l
  jr nc,+_
  sub TEXT_HEIGHT+1
  ld (menuselection),a
  scf
  ret
_:
  ld hl,(menutop)
  inc hl
  ld (menutop),hl
  ret


;These change to the next menu header
menu_left:
  ld a,(menuheader_cur)
  dec a
  jr +_
menu_right:
  ld a,(menuheader_cur)
  inc a
_:
  ld (menuheader_cur),a
  call reset_menu_cursor

draw_header:
;Set up textcoord
  ld hl,(menuheader_coord)
#ifndef TEXT_PAD_TOP
#ifdef TEXTCOORD_YX
  inc h
#else
  inc l
#endif
#endif
  ld (textcoord),hl


;Need to erase the current header area
#ifdef TEXTCOORD_YX
  ;need to swap the coords order
  ld b,l
  ld c,h
#else
  ld b,h
  ld c,l
#endif
#ifndef TEXT_PAD_TOP
  dec c
#endif

  ld de,(textbox_bottom)
  ld a,d
  sub b
  ld d,a
  dec b
  ld e,TEXT_HEIGHT+1
  rect_Erase()


;Verify that the header element is valid, wrap if needed
  ld hl,(menuheader_ptr)
  ld a,(menuheader_cur)
  cp (hl)
  jr c,+_
  inc a
  jr nz,$+5
  ;cycle to the last header
  ld a,(hl)
  dec a
  .db $FE
  ;cycle to the first header
  xor a
  ld (menuheader_cur),a
_:

;A is the header to seek
  ld bc,0   ;using CPIR, want to make sure it doesn't exit too soon!
  inc hl    ;point to the first header
  or a
  jr draw_header_loopend
draw_header_loop:
  push af
  xor a
  cpir
  pop af
  dec a
draw_header_loopend:
  jr nz,draw_header_loop

;now draw the header
  Text()
  or a
  ret

reset_menu_cursor:
  ld hl,(textbox_top)
  dec h
  ld (menuselection),hl
  ld hl,0
  ld (menutop),hl
  ld (menucur),hl
  ret


In my test, for example, I define these:

Code:

#define K_DELAY_DEFAULT 13
#define K_DELAY_ACCEL 3
#define Text() bcall(_VPutS)
#define TEXTCOORD_YX      ;comment if textcoord is X then Y (TI-OS is Y then X)
#define TEXT_PAD_TOP    ;comment if there is not a row of padding above the text
#define TEXT_HEIGHT 6
textcoord       = penCol
textbox_top     = $8000
textbox_left    = textbox_top+1
textbox_bottom  = $8002
textbox_right   = textbox_bottom+1
menucallptr     = $8004
menutop         = $8006
menucur         = $8008
menuselection   = $800A
menuheader_ptr  = $800C
menuheader_coord= $800E
menuheader_cur  = $8010   ;1 byte
k_save          = $8011   ;3 bytes
#macro rect_OR()
  ld hl,plotSScreen
  call rectOR
#endmacro
#macro rect_XOR()
  ld hl,plotSScreen
  call rectXOR
#endmacro
#macro rect_Erase()
  ld hl,plotSScreen
  call rectErase
#endmacro
#macro LCD_Update()
 bcall(_GrBufCpy)
#endmacro


And I include the following routines for getkey and rectangle drawing, and converting ints to a string:

Code:

getKeyDebounce:
k_count     = k_save+1
k_delay     = k_count+1
  ei
  halt
  call GetKey
  ld hl,k_save
  cp (hl)
  jr nz,newkeypress
;if the keys match, decrement k_count
  inc hl
  dec (hl)
  jr z,+_
  xor a
  ret
_:
  inc hl
  ld a,(hl)
  sub K_DELAY_ACCEL+1
  jr nc,+_
  xor a
_:
  inc a
  ld (hl),a
  dec hl
  ld (hl),a
  dec hl
  ld a,(hl)
  ret
newkeypress:
  ld (hl),a
  inc hl
  ld (hl),K_DELAY_DEFAULT
  inc hl
  ld (hl),K_DELAY_DEFAULT
  ret
getKey:
;Input:
;   Expects that interrupts are either off or won't interfere with port 1.
;Outputs:
;   A is a value from 0 to 56 that is the keypress
;Destroys:
;   C, DE
;     C is actually 1
;     E is actually a copy of the keypress
  ld c,1
  ld de,$FEFF
_:
  out (c),d
  rlc d
  ret nc
  inc e
  in a,(1)
  inc a
  jr z,-_
  dec a
  sla e
  sla e
  sla e
_:
  inc e
  rra
  jr c,-_
  ld a,e
  ret

rectOR:
;  (B,C) = (x,y) signed
;  (D,E) = (w,h) unsigned
;  HL points to buf
    push hl
    call rectSub
    pop ix
    ret nc
    ex de,hl
    add ix,de
    ex de,hl
    push ix
    pop hl
    dec b
    jp m,orrect0
    inc b
or_rect_loop:
    push bc
    push hl
    ld a,(hl) \ or d \ ld (hl),a \ inc hl
    dec b
    jr z,$+8
    ld c,-1
    ld (hl),c \ inc hl \ djnz $-2
    ld a,(hl) \ or e \ ld (hl),a
    ld bc,12
    pop hl
    add hl,bc
    pop bc
    dec c
    jr nz,or_rect_loop
    ret
orrect0:
    ld a,d
    and e
    ld b,c
    ld c,a
    ld de,12
    ld a,c
    or (hl)
    ld (hl),a
    add hl,de
    djnz $-4
    ret

rectXOR:
;  (B,C) = (x,y) signed
;  (D,E) = (w,h) unsigned
;  HL points to buf
    push hl
    call rectSub
    pop ix
    ret nc
    ex de,hl
    add ix,de
    ex de,hl
    push ix
    pop hl
    dec b
    jp m,xorrect0
    inc b
xor_rect_loop:
    push bc
    push hl
    ld a,(hl) \ xor d \ ld (hl),a \ inc hl
    dec b
    jr z,$+8
    ld a,(hl) \ cpl \ ld (hl),a \ inc hl \ djnz $-4
    ld a,(hl) \ xor e \ ld (hl),a
    ld bc,12
    pop hl
    add hl,bc
    pop bc
    dec c
    jr nz,xor_rect_loop
    ret
xorrect0:
    ld a,d
    and e
    ld b,c
    ld c,a
    ld de,12
    ld a,c
    xor (hl)
    ld (hl),a
    add hl,de
    djnz $-4
    ret

rectErase:
;  (B,C) = (x,y) signed
;  (D,E) = (w,h) unsigned
;  HL points to buf
    push hl
    call rectSub
    pop ix
    ret nc
    ex de,hl
    add ix,de
    ex de,hl
    push ix
    pop hl
    ld a,d
    cpl
    ld d,a
    ld a,e
    cpl
    ld e,a
    dec b
    jp m,eraserect0
    inc b
erase_rect_loop:
    push bc
    push hl
    ld a,(hl) \ and d \ ld (hl),a \ inc hl
    dec b
    jr z,$+7
    xor a
    ld (hl),a \ inc hl \ djnz $-2
    ld a,(hl) \ and e \ ld (hl),a
    ld bc,12
    pop hl
    add hl,bc
    pop bc
    dec c
    jr nz,erase_rect_loop
    ret
eraserect0:
    ld a,d
    xor e
    ld b,c
    ld c,a
    ld de,12
    ld a,c
    and (hl)
    ld (hl),a
    add hl,de
    djnz $-4
    ret

rectsub:
;(B,C) = (x,y) signed
;(D,E) = (w,h) unsigned
;Output:
;  Start Mask  D
;  End Mask    E
;  Byte width  B
;  Height      C
;  buf offset  HL
  bit 7,b
  jr z,+_
  ;Here, b is negative, so we have to add width to x.
  ;If the result is still negative, the entire box is out of bounds, so return
  ;otherwise, set width=newvalue,b=0
  ld a,d
  add a,b
  ret nc
  ld d,a
  ld b,0
_:
  bit 7,c
  jr z,+_
  ld a,e
  add a,c
  ret nc
  ld e,a
  ld c,0
_:
;We have clipped all negative areas.
;Now we need to verify that (x,y) are on the screen.
;If they aren't, then the whole rectangle is off-screen so no need to draw.
  ld a,b
  cp 96
  ret nc
  ld a,c
  cp 64
  ret nc
;Let's also verfiy that height and width are non-zero:
  ld a,d
  or a
  ret z
  ld a,e
  or a
  ret z
;Now we need to clip the width and height to be in-bounds
  add a,c
  cp 65
  jr c,+_
  ;Here we need to set e=64-c
  ld a,64
  sub c
  ld e,a
_:
  ld a,d
  add a,b
  cp 97
  jr c,+_
  ;Here we need to set d=96-b
  ld a,96
  sub b
  ld d,a
_:
;B is starting X
;C is starting Y
;D is width
;E is height

  push bc
  ld l,b
  ld a,b
  and 7
  ld b,a
  ld a,-1
  jr z,+_
  rra \ djnz $-1
_:
  inc a
  cpl
  ld h,a    ;start mask

  ld a,l
  add a,d
  and 7
  ld b,a
  ld a,-1
  jr z,+_
  rra \ djnz $-1
_:
  inc a
  ld l,a  ;end mask
  ex (sp),hl
  ld a,h
  ld h,b
  add hl,hl
  add hl,bc
  add hl,hl
  add hl,hl
  ld b,a
  rrca
  rrca
  rrca
  and 31
  add a,l
  ld l,a
  jr nc,$+3
  inc h

;B is the starting x, D is width
;Only A,B,D,E are available
  ld a,b
  add a,d
  and $F8
  ld d,a

  ld a,b
  and $F8
  ld b,a
  ld a,d
  sub b
  rrca
  rrca
  rrca
  ld b,a
  ld c,e
  pop de
  scf
  ret

uitoa_16:
;Input:
;   DE is the number to convert
;   HL points to where to write the ASCII string (up to 6 bytes needed).
;Output:
;   HL points to the null-terminated ASCII string
;      NOTE: This isn't necessarily the same as the input HL.
  push de
  push bc
  push af
  ex de,hl

  ld bc,-10000
  ld a,'0'-1
  inc a \ add hl,bc \ jr c,$-2
  ld (de),a
  inc de

  ld bc,1000
  ld a,'9'+1
  dec a \ add hl,bc \ jr nc,$-2
  ld (de),a
  inc de

  ld bc,-100
  ld a,'0'-1
  inc a \ add hl,bc \ jr c,$-2
  ld (de),a
  inc de

  ld a,l
  ld h,'9'+1
  dec h \ add a,10 \ jr nc,$-3
  add a,'0'
  ex de,hl
  ld (hl),d
  inc hl
  ld (hl),a
  inc hl
  ld (hl),0

;Now strip the leading zeros
  ld c,-6
  add hl,bc
  ld a,'0'
  inc hl \ cp (hl) \ jr z,$-2

;Make sure that the string is non-empty!
  ld a,(hl)
  or a
  jr nz,+_
  dec hl
_:

  pop af
  pop bc
  pop de
  ret



And I call it with:

Code:

  ld ix,menulookup
  ld bc,$1C02
  ld de,$283B
  ld hl,menhead
  set textWrite, (IY + sGrFlags)
  xor a
  ld (kbdScanCode),a

  call menu

  xor a
  ld (kbdScanCode),a
  res textWrite, (IY + sGrFlags)

  ...

menhead:
  .db 3
  .db "Strings",$05,0
  .db $CF,"Pictures",$05,0
  .db $CF,"GDB",0

menulookup:
  jr c,menu_select
menu_get:
  ld d,a
  ld a,b
  add a,-1
  ret c
  inc bc
  ld a,c
  sub 10
  jr nz,+_
  ld b,a
  ld c,a
_:
  push bc
  ld hl,s_str
  dec d
  jr nz,+_
  ld hl,s_pic
_:
  dec d
  jr nz,+_
  ld hl,s_GDB
_:
  ld de,OP1
  ldi
  ldi
  ldi
  ex de,hl
  pop de
  push hl
  call uitoa_16
  pop de
  ;de is where to write
  call strcopy
  ld hl,OP1
  xor a
  ret
menu_select:
  ld hl,(menucur-1)
  ld l,$AA
  dec a \ jr nz,$+4 \ ld l,$60
  dec a \ ret nz \ ld l,$61
  ret
s_str: .db "Str"
s_pic: .db "Pic"
s_GDB: .db "GDB"


It's a lot of overhead if your project doesn't need those rectangle routines or the getkey routines, but, you can modify those to use the built-in routines (it's just smoother this way). Either way, check out the example:


EDIT: Updated to define if there is padding above the text or not. Here is a screenshot of using the large font (save bandwidth, don't need a gif for this one Razz)
The scrollable menu looks great!
sqrtHLIX
This is a 32-bit integer square root routine. It uses the unrolled sqrtHL routine at the bottom of this post as a subroutine. Note that this routine makes use of the undocumented SLL instruction, and so you might need to modify it for some devices. You can replace it with scf \ rl * at the cost of one byte and 4cc.

Code:

sqrtHLIX:
;Input: HLIX
;Output: DE is the sqrt, AHL is the remainder
;speed: 751+6{0,6}+{0,3+{0,18}}+{0,38}+sqrtHL
;min: 1103
;max: 1237
;avg: 1165.5
;166 bytes

  call sqrtHL   ;expects returns A as sqrt, HL as remainder, D = 0
  add a,a
  ld e,a
  rl d

  ld a,ixh
  sll e \ rl d
  add a,a \ adc hl,hl
  add a,a \ adc hl,hl
  sbc hl,de
  jr nc,+_
  add hl,de
  dec e
  .db $FE     ;start of `cp *`
_:
  inc e

  sll e \ rl d
  add a,a \ adc hl,hl
  add a,a \ adc hl,hl
  sbc hl,de
  jr nc,+_
  add hl,de
  dec e
  .db $FE     ;start of `cp *`
_:
  inc e

  sll e \ rl d
  add a,a \ adc hl,hl
  add a,a \ adc hl,hl
  sbc hl,de
  jr nc,+_
  add hl,de
  dec e
  .db $FE     ;start of `cp *`
_:
  inc e

  sll e \ rl d
  add a,a \ adc hl,hl
  add a,a \ adc hl,hl
  sbc hl,de
  jr nc,+_
  add hl,de
  dec e
  .db $FE     ;start of `cp *`
_:
  inc e

;Now we have four more iterations
;The first two are no problem
  ld a,ixl
  sll e \ rl d
  add a,a \ adc hl,hl
  add a,a \ adc hl,hl
  sbc hl,de
  jr nc,+_
  add hl,de
  dec e
  .db $FE     ;start of `cp *`
_:
  inc e

  sll e \ rl d
  add a,a \ adc hl,hl
  add a,a \ adc hl,hl
  sbc hl,de
  jr nc,+_
  add hl,de
  dec e
  .db $FE     ;start of `cp *`
_:
  inc e

sqrt32_iter15:
;On the next iteration, HL might temporarily overflow by 1 bit
  sll e \ rl d      ;sla e \ rl d \ inc e
  add a,a
  adc hl,hl
  add a,a
  adc hl,hl       ;This might overflow!
  jr c,sqrt32_iter15_br0
;
  sbc hl,de
  jr nc,+_
  add hl,de
  dec e
  jr sqrt32_iter16
sqrt32_iter15_br0:
  or a
  sbc hl,de
_:
  inc e

;On the next iteration, HL is allowed to overflow, DE could overflow with our current routine, but it needs to be shifted right at the end, anyways
sqrt32_iter16:
  add a,a
  ld b,a        ;either 0x00 or 0x80
  adc hl,hl
  rla
  adc hl,hl
  rla
;AHL - (DE+DE+1)
  sbc hl,de \ sbc a,b
  inc e
  or a
  sbc hl,de \ sbc a,b
  ret p
  add hl,de
  adc a,b
  dec e
  add hl,de
  adc a,b
  ret

This routine outputs a remainder, which is useful to have on hand if you want to build higher precision routines. The two routines combined use 260 bytes in total, but that's the price of quick maffs.
Here is a neat circle routine that I made:

Code:

;Written by Zeda Thomas, free to use.

;This draws a circle centered at 8-bit coordinates and with radius up to 127.
;IX points to a `plot` routine that takes (B,C)=(x,y) as input and does something
;with it, like plot the pixel a certain color, or plot a "big" pixel, or whatever.
;   plot
;     Takes coordinates, (B,C) = (x,y) and plots the point.
;
;For example, on the TI-83+/84+/SE the plot routine might look like:
; plot:
;   call getpixelloc
;   ret nc            ;Exit if the coordinates are out-of-bounds
;   or (hl)
;   ld (hl),a
;   ret
;
;
; Required subroutines:
;     call_ix:
;       jp (ix)

circle:
;Input:
; (B,C) is the center (x,y)
; D is the radius, unsigned, less than 128 (0 or greater than 128 just quits).
; IX points to a `plot` routine that takes (B,C)=(x,y) as input.
  ld a,d
  add a,a
  ret c
  ret z
  ld l,d
  dec a
  ld e,a
  dec a        ;if the pixel is only 1 wide, just plot the point
  jp z,call_ix ;Jump to the plot routine
  xor a
  ld h,-1
  ld d,1
  scf     ;skip the first plot
circleloop:
  call nc,plot4
  inc h
  sub d
  inc d
  inc d
  jr nc,circleloop
_:
  dec l
  call plot4
  add a,e
  dec e
  ret z
  dec e
  jr nc,-_
  jp circleloop


plot4:
;BC is center
;HL is x,y

  push de
  push af
  push hl
  push bc

;If H is 0, or L is 0, we need to draw only half
  push hl

  ld a,b
  sub h
  ld b,a
  add a,h
  add a,h
  ld h,a

  ld a,c
  sub l
  ld c,a
  add a,l
  add a,l
  ld l,a

;B is x0-x
;C is y0-y
;H is x0+x
;L is y0+y


;plot(x0-x,y0-y)
;plot(x0+x,y0+y)
  push bc
  push hl
  call call_ix    ;call the plot routine
  pop bc
  push bc
  call call_ix    ;call the plot routine

;now swap the y coords
  pop hl
  pop bc
  ld a,l
  ld l,c
  ld c,a
  pop de
  xor a
  cp d
  jr z,+_
  cp e
  jr z,+_


;plot(x0-x,y0+y)
;plot(x0+x,y0-y)
  push hl
  call call_ix    ;call the plot routine
  pop bc
  call call_ix    ;call the plot routine
_:

  pop bc
  pop hl
  pop af
  pop de
  ret

The really cool feature about this is that you can define a custom plot routine pointed to by IX, so it isn't TI-specific, and you can do all sorts of wonky things like:
Draw 2x2 pixels:


Code:

;calling with `ld ix,pixelOn_2x2`

pixelOn_2x2:
  sla b
  ret c
  sla c
  ret c
  push bc
  call pixelOn
  pop bc
  inc b
  push bc
  call pixelOn
  pop bc
  inc c
  push bc
  call pixelOn
  pop bc
  dec b
  jp pixelOn


Or draw a circle whose "pixels" are circles:


Code:

;calling with `ld ix,pixelOn_circle`

pixelOn_circle:
  ld a,b
  cp 32
  ret nc
  add a,a
  add a,a
  ld b,a
  ld a,c
  cp 32
  ret nc
  add a,a
  add a,a
  ld c,a
  ld d,4
  push ix    ;need to save IX!
  ld ix,pixelOn
  call circle
  pop ix
  ret


EDIT: I inlined some subroutines because there was no reason to have them called. It was a waste of clock cycles and space!

EDIT: Have a separate, filled rectangle routine!
Note that if you pass the same arguments as the regular circle routine, this only draws the inside part and skips the border.

Code:

;Written by Zeda Thomas, free to use.

;This draws the fill of a circle centered at 8-bit coordinates and with radius
;up to 127.
;IX points to a `horizontal line` routine that takes E=x, A=y, D=width as input
;and does something with it, like plot a horizontal line.
;
; For example, on the ti-83+/84+/SE calculators, you might have:
;     horizontal_line:
;       ld b,e
;       ld c,a
;       ld e,1
;       ld hl,gbuf
;       jp rectOR

; Required subroutines:
;     call_ix:
;       jp (ix)

filledcircle:
;Input:
; (B,C) is the center (x,y)
; D is the radius, unsigned, less than 128 (0 or greater than 128 just quits).
; IX points to a `plot` routine that takes (B,C)=(x,y) as input.
  ld a,d
  add a,a
  ret c
  ret z
  ld l,d
  dec a
  ld e,a
  xor a
  ld h,-1
  ld d,1
filledcircleloop:
  ; call c,fillcircle_plot
  inc h
  sub d
  inc d
  inc d
  jr nc,filledcircleloop
_:
  dec l
  call fillcircle_plot
  add a,e
  dec e
  ret z
  dec e
  jr nc,-_
  jp filledcircleloop
fillcircle_plot:
  inc h
  dec h
  ret z
  push hl
  push de
  push bc
  push af
  dec h
  ld a,b
  sub h
  ld e,a
  ld d,h
  sll d   ;aka `slia`, undocumented

  ld a,l
  or a
  ld h,c
  jr z,+_
  add a,h
  push de
  push hl
  call nz,call_ix
  pop hl
  pop de
_:
  ld a,h
  sub l
  call call_ix
  pop af
  pop bc
  pop de
  pop hl
  ret

Here is an example screenshot of when I used an inverted horizontal line routine:
  
Register to Join the Conversation
Have your own thoughts to add to this or any other topic? Want to ask a question, offer a suggestion, share your own programs and projects, upload a file to the file archives, get help with calculator and computer programming, or simply chat with like-minded coders and tech and calculator enthusiasts via the site-wide AJAX SAX widget? Registration for a free Cemetech account only takes a minute.

» Go to Registration page
Page 8 of 8
» All times are GMT - 5 Hours
 
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum

 

Advertisement