/*************************************************************
 * File: lib/bcopy.s
 * Purpose: Part of C runtime library
 */

#include <regdef.h>

	.globl	bcopy
	.ent	bcopy
bcopy:
	.set noreorder
	/* bcopy(src,dst,bytes) - copy src bytes to dst */

	beqz	a2, done
	li		t8, 3

	li		t9, 16
	
	li		t7, 1	
	xor		t1, a0, a1	
	bgtu	a0, a1, copyfwd		# if src gt dst, copy forward
	and		t1, t8				# t1 indicates alignment of src/dst

	addu	a0, a2				# copy from end of block backwards
	addu	a1, a2
	
	beqz	t1, bkwdword		# copy backwards both can be on word boundary
	and		t2, a0, t8
	
	beq		t1, 2, bkwdhalf		# copy backwards both can on half word	
	and		t2, a0, t7

1:	# copy byte by byte	

	bgtu	t9, a2, finibkwd	# less than 16 bytes to copy
	lb		t1, -1(a0)

	lb		t2, -2(a0)	
	lb		t3, -3(a0)	
	lb		t4, -4(a0)	
	sb		t1, -1(a1)	 
	sb		t2, -2(a1)	 
	sb		t3, -3(a1)	 
	sb		t4, -4(a1)	 

	lb		t1, -5(a0)	 
	lb		t2, -6(a0)	 
	lb		t3, -7(a0)	 
	lb		t4, -8(a0)
	sb		t1, -5(a1)	 
	sb		t2, -6(a1)	 
	sb		t3, -7(a1)	 
	sb		t4, -8(a1)

	lb		t1, -9(a0)	 
	lb		t2, -10(a0)	 
	lb		t3, -11(a0)	 
	lb		t4, -12(a0)
	sb		t1, -9(a1)	 
	sb		t2, -10(a1)	 
	sb		t3, -11(a1)	 
	sb		t4, -12(a1)

	lb		t1, -13(a0)	 
	lb		t2, -14(a0)	 
	lb		t3, -15(a0)	 
	lb		t4, -16(a0)
	sb		t1, -13(a1)	 
	sb		t2, -14(a1)	 
	sb		t3, -15(a1)	 
	sb		t4, -16(a1)

	subu	a2, t9	 
	subu	a0, t9	 
	b		1b	
	subu	a1, t9	 

finibkwd:
	beqz	a2, done	
	lb		t1, -1(a0)
	subu	a2, t7	 
	subu	a0, t7
	sb		t1, -1(a1)
	b		finibkwd	
	subu	a1, t7

bkwdhalf:	
	beqz	t2, 1f				# can start with word copy
	lb		t1, -1(a0)
	subu	a2, t7	 
	sb		t1, -1(a1)	
	subu	a0, t7
	subu	a1, t7

1:	# copy half word by half word

	bgtu	t9, a2, finibkwd	# less than 16 bytes to copy
	lh		t1, -2(a0)	

	lh		t2, -4(a0)	
	lh		t3, -6(a0)	
	lh		t4, -8(a0)	
	sh		t1, -2(a1)	
	sh		t2, -4(a1)	
	sh		t3, -6(a1)	
	sh		t4, -8(a1)	

	lh		t1, -10(a0)	
	lh		t2, -12(a0)	
	lh		t3, -14(a0)	
	lh		t4, -16(a0)	
	sh		t1, -10(a1)	
	sh		t2, -12(a1)	
	sh		t3, -14(a1)	
	sh		t4, -16(a1)	

	subu	a2, t9	 
	subu	a0, t9	 
	b		1b	
	subu	a1, t9	 

bkwdword:	
	beqz	t2, 1f				# can start with word copy
	lb		t1, -1(a0)

	subu	a2, t7	 
	subu	a0, t7
	sb		t1, -1(a1)	
	beqz	a2, done	 
	and		t2, a0, t8	

	b		bkwdword	
	subu	a1, t7

1:	# copy word by word

	bgtu	t9, a2, finibkwd	# less than 16 bytes to copy
	lw		t1, -4(a0)	

	lw		t2, -8(a0)	
	lw		t3, -12(a0)	
	lw		t4, -16(a0)	

	sw		t1, -4(a1)	
	sw		t2, -8(a1)	
	sw		t3, -12(a1)	
	sw		t4, -16(a1)	

	subu	a2, t9	 
	subu	a0, t9	 
	b		1b	
	subu	a1, t9	 

copyfwd:	
	beqz	t1, fwdword			# copy forwards both can be on word boundary
	and		t2, a0, t8

	
	beq		t1, 2, fwdhalf		# copy forwards both can be on half word	
	and		t2, a0, t7
	
1:	# copy byte by byte	

	bgtu	t9, a2, finifwd		# less than 16 bytes to copy
	lb		t1, 0(a0)

	lb		t2, 1(a0)	
	lb		t3, 2(a0)	
	lb		t4, 3(a0)	
	sb		t1, 0(a1)	 
	sb		t2, 1(a1)	 
	sb		t3, 2(a1)	 
	sb		t4, 3(a1)	 

	lb		t1, 4(a0)	
	lb		t2, 5(a0)	
	lb		t3, 6(a0)	
	lb		t4, 7(a0)
	sb		t1, 4(a1)	 
	sb		t2, 5(a1)	 
	sb		t3, 6(a1)	 
	sb		t4, 7(a1)

	lb		t1, 8(a0)	
	lb		t2, 9(a0)	
	lb		t3, 10(a0)	
	lb		t4, 11(a0)
	sb		t1, 8(a1)	 
	sb		t2, 9(a1)	 
	sb		t3, 10(a1)	 
	sb		t4, 11(a1)

	lb		t1, 12(a0)	
	lb		t2, 13(a0)	
	lb		t3, 14(a0)	
	lb		t4, 15(a0)
	sb		t1, 12(a1)	 
	sb		t2, 13(a1)	 
	sb		t3, 14(a1)	 
	sb		t4, 15(a1)

	subu	a2, t9	 
	addu	a0, t9	 
	b		1b	
	addu	a1, t9	 

	j		ra		
	nop

finifwd:
	beqz	a2, done
	lb		t1, 0(a0)
	subu	a2, t7	 
	addu	a0, t7
	sb		t1, 0(a1)
	b		finifwd	 
	addu	a1, t7
	
fwdhalf:	
	beqz	t2, 1f				# can start with word copy
	lb		t1, 0(a0)
	subu	a2, t7	 
	sb		t1, 0(a1)	
	addu	a0, t7
	addu	a1, t7

1:	# copy half word by half word

	bgtu	t9, a2, finifwd		# less than 16 bytes to copy
	lh		t1, 0(a0)	

	lh		t2, 2(a0)	
	lh		t3, 4(a0)	
	lh		t4, 6(a0)	
	sh		t1, 0(a1)	
	sh		t2, 2(a1)	
	sh		t3, 4(a1)	
	sh		t4, 6(a1)	

	lh		t1, 8(a0)	
	lh		t2, 10(a0)	
	lh		t3, 12(a0)	
	lh		t4, 14(a0)	
	sh		t1, 8(a1)	
	sh		t2, 10(a1)	
	sh		t3, 12(a1)	
	sh		t4, 14(a1)	

	subu	a2, t9	 
	addu	a0, t9	 

	b		1b	
	addu	a1, t9	 

fwdword:	
	beqz	t2, 1f				# can start with word copy
	lb		t1, 0(a0)

	subu	a2, t7	 
	addu	a0, t7
	sb		t1, 0(a1)	
	beqz	a2, done	 
	and		t2, a0, t8	

	b		fwdword	
	addu	a1, t7

1:	# copy word by word

	bgtu	t9, a2, finifwd		# less than 16 bytes to copy
	lw		t1, 0(a0)	

	lw		t2, 4(a0)	
	lw		t3, 8(a0)	
	lw		t4, 12(a0)	

	sw		t1, 0(a1)	
	sw		t2, 4(a1)	
	sw		t3, 8(a1)	
	sw		t4, 12(a1)	

	subu	a2, t9	 
	addu	a0, t9	 
	b		1b	
	addu	a1, t9	 

done:	
	j		ra
	nop
	
	.set reorder
	.end	bcopy
