A faster BlockCopy (if n > 0):
void BlockCopy(u8 * Dest, const u8 * Source, u16 n)
{
__asm(" ld XDE,(XSP+0x4)");
__asm(" ld XHL,(XSP+0x8)");
__asm(" ld BC,(XSP+0xc)");
__asm(" ldir (XDE+),(XHL+)");
}
with n == 0 check:
ld BC,(XSP+0xc)
cp BC,0
ret z
ld XDE,(XSP+0x4)
ld XHL,(XSP+0x8)
ldir (XDE+),(XHL+)
instead of generated asm :
_BlockCopy:
ld XHL,(XSP+0x4)
ld XDE,(XSP+0x8)
ld BC,(XSP+0xc)
ld WA,BC
dec 0x1,BC
cp WA,0x0
ret eq
L78: ;2
ld A,(XDE+:1)
ld (XHL+:1),A
ld WA,BC
dec 0x1,BC
cp WA,0x0
j ne,L78
L77: ;1
ret
but memcpy from the compiler lib is even more optimized to use ldirw (https://github.com/sodthor/ngpcdev/blob/...2/memcpy.c)
void BlockCopy(u8 * Dest, const u8 * Source, u16 n)
{
__asm(" ld XDE,(XSP+0x4)");
__asm(" ld XHL,(XSP+0x8)");
__asm(" ld BC,(XSP+0xc)");
__asm(" ldir (XDE+),(XHL+)");
}
with n == 0 check:
ld BC,(XSP+0xc)
cp BC,0
ret z
ld XDE,(XSP+0x4)
ld XHL,(XSP+0x8)
ldir (XDE+),(XHL+)
instead of generated asm :
_BlockCopy:
ld XHL,(XSP+0x4)
ld XDE,(XSP+0x8)
ld BC,(XSP+0xc)
ld WA,BC
dec 0x1,BC
cp WA,0x0
ret eq
L78: ;2
ld A,(XDE+:1)
ld (XHL+:1),A
ld WA,BC
dec 0x1,BC
cp WA,0x0
j ne,L78
L77: ;1
ret
but memcpy from the compiler lib is even more optimized to use ldirw (https://github.com/sodthor/ngpcdev/blob/...2/memcpy.c)

