*bump* Though I am loath to re-use this thread, I have some TI-84+CSE sprite routines that need to be hella optimized. Who has the time and inclination to take a look? I can provide more details about inputs, outputs, and functionality as needed.
Code: DrawSprite_CommonPreface:
ld h,0
ld a,$50 ; set minimum Y
call Write_Display_Control
ld a,$20 ; set current Y
call Write_Display_Control
ex de,hl
ld c,(hl) ;width
inc hl
ld a,(hl) ;height
inc hl
; At this point we have finished vertical clipping and set the Y register
; A = height of the sprite (excluding rows clipped out)
; HL -> pixel data of first pixel of first row to draw
; IX -> color table
; B = X coordinate
; C = width of sprite in bytes
ld (SpriteHeight),a
ld a,c
ld (SpriteWidth),a
ex de,hl
push bc
ld l,b ; set minimum X
ld h,0
add hl,hl
ld a,$52
call Write_Display_Control
ld a,$21 ; set current X
call Write_Display_Control
pop bc
ret
; ix -> palette, b = x, l = y, de -> sprite
DrawSprite_1Bit:
call DrawSprite_CommonPreface
ld a,b ; set maximum X
add a,c
add a,c
add a,c
add a,c
ld l,a
ld h,0
add hl,hl
dec hl
ld a,$53
call Write_Display_Control
ld a,$22
out ($10),a
out ($10),a
ld a,(SpriteHeight)
ld b,a
Draw_Sprite_1Bit_PackedLoop:
push bc
ld a,(SpriteWidth)
ld b,a
Draw_Sprite_1Bit_PackedLine: ; PAD's fixed-palette version: 134 clocks per loop, 67 clocks per pixel
; My variable-palette version; 435 clocks per loop, 109 clocks per pixels
push bc ;11
ld b,8*3
ld a,(de) ;7
Draw_Sprite_1Bit_PackedLine_Bit:
rlc a
ld c,a
push ix
pop hl
and 1
jr z,Draw_Sprite_1Bit_PackedLine_BitLow
inc hl
inc hl
Draw_Sprite_1Bit_PackedLine_BitLow:
ld a,c
ld c,$11
outi ;16
outi ;16
djnz Draw_Sprite_1Bit_PackedLine_Bit
inc de ;6
pop bc ;10
djnz Draw_Sprite_1Bit_PackedLine ;13 if jump taken
pop bc ;10
djnz Draw_Sprite_1Bit_PackedLoop ;13 if jump taken
ret
; ix -> palette, b = x, l = y, de -> sprite
DrawSprite_2Bit:
call DrawSprite_CommonPreface
ld a,b ; set maximum X
add a,c
add a,c
ld l,a
ld h,0
add hl,hl
dec hl
ld a,$53
call Write_Display_Control
ld a,$22
out ($10),a
out ($10),a
ld a,(SpriteHeight)
ld b,a
draw_packed_loop_2bit:
push bc
ld a,(SpriteWidth)
ld b,a
draw_packed_line_2bit: ; PAD's fixed-palette version: 134 clocks per loop, 67 clocks per pixel
; My variable-palette version; 435 clocks per loop, 109 clocks per pixels
push bc ;11
ld a,(de) ;7
rra ;4
rra ;4
rra ;4
rra ;4
rra ;4
and %110 ;7
push ix ;11
pop hl ;10
ld c,a ;4
ld b,0 ;7
add hl,bc ;11
ld c,$11
outi ;16
outi ;16
ld a,(de) ;7
rra ;4
rra ;4
rra ;4
and %110 ;7
push ix ;11
pop hl ;10
ld c,a ;4
ld b,0 ;7
add hl,bc ;11
ld c,$11
outi ;16
outi ;16
ld a,(de) ;7
rra ;4
and %110 ;7
push ix ;11
pop hl ;10
ld c,a ;4
ld b,0 ;7
add hl,bc ;11
ld c,$11
outi ;16
outi ;16
ld a,(de) ;7
rla ;4
and %110 ;7
push ix ;11
pop hl ;10
ld c,a ;4
ld b,0 ;7
add hl,bc ;11
ld c,$11
outi ;16
outi ;16
inc de ;6
pop bc ;10
djnz draw_packed_line_2bit ;13 if jump taken
pop bc ;10
djnz draw_packed_loop_2bit ;13 if jump taken
ret
DrawSprite_4Bit_Enlarge:
call DrawSprite_CommonPreface
ld a,b ; set maximum X
add a,c
add a,c ; double width for 2x enlargement
ld l,a
ld h,0
add hl,hl
dec hl
ld a,$53
call Write_Display_Control
ld a,$22
out ($10),a
out ($10),a
ld a,(SpriteHeight)
ld b,a
Draw_Sprite_4Bit_Enlarge_PackedLoop:
push bc
push de
call Draw_Sprite_4Bit_Enlarge_PackedLine_Sub
pop de
call Draw_Sprite_4Bit_Enlarge_PackedLine_Sub
pop bc ;10
djnz Draw_Sprite_4Bit_Enlarge_PackedLoop ;13 if jump taken
ret
Draw_Sprite_4Bit_Enlarge_PackedLine_Sub:
ld a,(SpriteWidth)
ld b,a
Draw_Sprite_4Bit_Enlarge_PackedLine:
push bc ;11
ld a,(de) ;7
rra ;4
rra ;4
rra ;4
and %11110 ;7
push ix ;11
pop hl ;10
ld c,a ;4
ld b,0 ;7
add hl,bc ;11
ld c,$11
outi ;16
outi ;16 ;First pixel copy
dec hl
dec hl
outi ;16
outi ;16 ;Second pixel copy
ld a,(de) ;7
rla ;4
and %11110 ;7
push ix ;11
pop hl ;10
ld c,a ;4
ld b,0 ;7
add hl,bc ;11
ld c,$11
outi ;16
outi ;16 ;First pixel copy
dec hl
dec hl
outi ;16
outi ;16 ;Second pixel copy
inc de ;6
pop bc ;10
djnz Draw_Sprite_4Bit_Enlarge_PackedLine ;13 if jump taken
ret
; ix -> palette, b = x, l = y, de -> sprite
DrawSprite_4Bit:
call DrawSprite_CommonPreface
ld a,b ; set maximum X
add a,c
ld l,a
ld h,0
add hl,hl
dec hl
ld a,$53
call Write_Display_Control
ld a,$22
out ($10),a
out ($10),a
ld a,(SpriteHeight)
ld b,a
Draw_Sprite_4Bit_PackedLoop:
push bc
ld a,(SpriteWidth)
ld b,a
Draw_Sprite_4Bit_PackedLine: ; PAD's fixed-palette version: 134 clocks per loop, 67 clocks per pixel
; My variable-palette version; 234 clocks per loop, 117 clocks per pixels
push bc ;11
ld a,(de) ;7
rra ;4
rra ;4
rra ;4
and %11110 ;7
push ix ;11
pop hl ;10
ld c,a ;4
ld b,0 ;7
add hl,bc ;11
ld c,$11
outi ;16
outi ;16
ld a,(de) ;7
rla ;4
and %11110 ;7
push ix ;11
pop hl ;10
ld c,a ;4
ld b,0 ;7
add hl,bc ;11
ld c,$11
outi ;16
outi ;16
inc de ;6
pop bc ;10
djnz Draw_Sprite_4Bit_PackedLine ;13 if jump taken
pop bc ;10
djnz Draw_Sprite_4Bit_PackedLoop ;13 if jump taken
ret