/*************************************************************
* File: k4001.s
* Purpose: A serialICE kernel for the BDMR4001 eval board. This
* 	   kernel will also work when the the board is fitted with
*	   a 4002, 4003, or 4101.
* Author: Phil Bunce (pjb@carmel.com)
* Revision History:
*	970117	Initial release
*	970121	Added ACR_COPY and IMR_COPY
*	970218	Added comments to 1st para.
*	970224	Added comments about downloading with RUN_APPLICATION
*	970303	Added entry symbol _start. (BSO)
*	970306	Changed name of previous _start to cstartup.
*	970306	Added IBS to save area
*	970709	Doubled IBUFSIZE for 4101 rom bpts w/ 8wd refill
*	970826	Added wakeup banner (ABCD).
*	970829	Removed call to CpuInit in cstartup.
*	980312	Switched to version2 savearea format
*
* This file contains the PROM resident code (the IceKernel) necessary to
* permit programs to be debugged using LSI Logic's SerialIce.  This
* implementation is for the BDMR4001 (the 4001/2/3 evaluation board).
* It uses channel A of the 2681 DUART to communicate with at 38400
* with the ICEmonitor.
*
* If are using the 4003, you might also need a multiply/divide
* emulation package to support your C application.
*
* The code in this module executes in kseg1 (non cacheable), leaves
* BEV=1, and does not initialize the caches. If the switch
* RUN_APPLICATION is specified, the caches are flushed and BEV is set to
* zero so that the application runs cached (kseg0). However, the
* IceKernel always runs from kseg1.
*
* This module should be linked with .text section to start at 9fc00000,
* and with data to start at some out-of-the-way place in RAM. I suggest
* 80000100. That way the downloaded programs can start at 80000200 and
* still be able to copy an exception handler to 80000080 for use once BEV
* has been cleared. Example compile/link command for this file:
*
*     pmcc -crt0 -prom -syms -T 9fc00000 -D 80000100 -o k4001 k4001.s
*
* You can either link your application with this file and include it in
* the PROM, or download your application into RAM and execute it there.
*
* To include your application in the PROM you should enable the switch
* RUN_APPLICATION. This module will then call your program starting at
* the label 'main'. Note that if you do enable RUN_APPLICATION, then
* the clrbss routine of you application will almost certainly clear the
* savearea, and that will prevent you from downloading, because the
* ICEkernel will no longer be able to provide the address of the get_word
* routine.
*
* This file contains the following blocks of code:
*	reset_vector - The start of execution
*	utlb_vector  - UTLB exception handler
*	gen_vector   - Handler for all other exceptions
*	ice_loop     - Main loop of ICE
*	get_word     - Get one word from serial interface
*	put_word     - Put one word to serial interface
*	cpu_init     - Perform CPU-specific initialization
*
* The following routines are provided for a C application:
*	cstartup     - Prepare to execute a C application
*	getmachtype  - Returns the CPU type
*       ehandler     - The exception handler
*       flush_cache  - C callable routine to flush caches
*
* This module requires the following areas of RAM:
*	instr_buffer - This is where the instructions that have been
*		       received from the host are saved.
*	savearea     - This is where I save the context of the
*		       downloaded program.
*	
*/
/*#define RUN_APPLICATION	/* run application on power-up */

#include <mips.h>

/* Commands that are sent by the IceController */
#define SENDA0		0x12345678	/* execute instr_buffer */
#define RUN_MODE	0x87654321	/* run application */
#define SENDSAP		0xDEADBEEF	/* send savearea pointer */
#define ATTN		0x55		/* transfer control to IceKernel */
#define ACK		0xaa		/* reply to ATTN */

/* Offsets into the savearea */
#define SA_VERS 2
#define ICE_SAV	0	/* save area version */
#define ICE_SAH 1	/* save area header size */
#define ICE_MAP	2	/* bit-map for savearea */
#define ICE_IBS 3	/* size of instr buffer */
#define ICE_GWP 4	/* pointer to get_word routine */
#define ICE_PWP 5	/* pointer to put_word routine */
#define ICE_EPC	6	/* saved so that it can be set */
#define ICE_LE  7	/* set if little endian */
#define ICE_SAHSIZE 8	/* size of save area header */
/* end of header. The remainder is kernel-specific */
#define ICE_AT	(ICE_SAHSIZE+0)	/* v0 is used to hold the value received */
#define ICE_V0	(ICE_SAHSIZE+1)	/* v0 is used to hold the value received */
#define ICE_A0	(ICE_SAHSIZE+2)	/* a0 is used to hold the value to be sent */
#define ICE_A1	(ICE_SAHSIZE+3)	/* a1 is used as a temp */
#define ICE_A2	(ICE_SAHSIZE+4)	/* a1 is used as a temp */
#define ICE_A3	(ICE_SAHSIZE+5)	/* a1 is used as a temp */
#define ICE_T0	(ICE_SAHSIZE+6)	/* t0 is used by the host as a temp */
#define ICE_T1	(ICE_SAHSIZE+7)	/* t1 is used by the host as a temp */
#define ICE_T2	(ICE_SAHSIZE+8)	/* t2 temp */
#define ICE_T3	(ICE_SAHSIZE+9)	/* t3 temp */
#define ICE_T4	(ICE_SAHSIZE+10)	/* t4 temp */
#define ICE_S0  (ICE_SAHSIZE+11)	/* pointer to instr_buffer */
#define ICE_RA	(ICE_SAHSIZE+12)	/* ra is needed for bal/jal instrs */
#define ICE_SIZE (ICE_SAHSIZE+13)
#define REG_MAP	0x80011ff6	/* gp regs in savearea */


/* Channel A of the 2681 DUART is used for communication with 
 * the IceController. It uses 38400 baud.
 */
#ifdef MIPSEB
#define UART_BASE	0xbe000003
#else
#define UART_BASE	0xbe000000
#endif
#define UART_INTBIT	SR_INT0 /* DUART is connected to int0 */
#define UART_RXHR	(4*3)	/* offset from UART_BASE */
#define UART_TXHR	(4*3)	/* offset from UART_BASE */
#define UART_STATUS	(4*1) 	/* offset from UART_BASE */
#define UART_TXRDY	(31-2)  /* shift amount to test bit */
#define UART_RXRDY	(31-0)  /* shift amount to test bit */
#define LR4001_RevA	/* enable the fix for 4001_RevA bug */

/* 
 * I steal one word of the RAM vector area to hold copies of some 
 * of the 2681's w/o registers.
 */
#define ACR_COPY	0xa00000fc /* ptr to copy of acr reg */
#define IMR_COPY	0xa00000fd /* ptr to copy of imr reg */

#define J_RA_INSTR	0x03e00008
#define IBUFSIZE	60

	.comm instr_buffer,IBUFSIZE*4
	.comm savearea,ICE_SIZE*4

/*************************************************************
*  reset_vector:
*	This is where execution starts.
*	A maximum of 64 instructions allowed in this section
*/
	.globl _start
_start:
reset_vector: # bfc00000
	bal	cpu_init

	# make sure the sw bits of the CAUSE register are zero
	.set noreorder
	mtc0	zero,C0_CAUSE		
	.set reorder

	# enable ints in SR MASK+IEC
	li	k0,(SR_BEV|SR_IEC|UART_INTBIT)
	.set noreorder
	mtc0	k0,C0_SR
	.set reorder

	la	k0,savearea
	li	k1,K1BASE
	or	k0,k1
	la	t0,get_word
	sw	t0,ICE_GWP*4(k0)
	la	t0,put_word
	sw	t0,ICE_PWP*4(k0)
	li	t0,IBUFSIZE
	sw	t0,ICE_IBS*4(k0)
	li	t0,REG_MAP
	sw	t0,ICE_MAP*4(k0)
	li	t0,SA_VERS
	sw	t0,ICE_SAV*4(k0)
	li	t0,ICE_SAHSIZE
	sw	t0,ICE_SAH*4(k0)
#ifdef MIPSEB
	li	t0,0
#else
	li	t0,1
#endif
	sw	t0,ICE_LE*4(k0)

#ifdef RUN_APPLICATION
	# execute a prom-resident application
	b	cstartup
#else
	li	a0,0x44434241	# DCBA
	bal	put_word
	# wait here for the host to speak to me
   1:	b	1b
#endif


/*************************************************************
* 		Start of interrupt-level code		     *
*************************************************************/
	.set noat

/*************************************************************
*  utlb_vector:
*	We should never get one of these. But just in case.
*/
	.align 8
utlb_vector: # bfc00100
	b	gen_vector

/*************************************************************
*  gen_vector:
*	All the exceptions come through here.
*/
	.align 7
	.globl gen_vector
	.ent gen_vector
gen_vector: # bfc00180
	# make sure that we are in kseg1
	la	k0,1f
	li	k1,K1BASE
	or	k0,k1
	j	k0
   1:
	# save regs
	la	k0,savearea
	li	k1,K1BASE
	or	k0,k1
	sw	AT,ICE_AT*4(k0)
	sw	v0,ICE_V0*4(k0)
	sw	a0,ICE_A0*4(k0)
	sw	a1,ICE_A1*4(k0)
	sw	a2,ICE_A2*4(k0)
	sw	a3,ICE_A3*4(k0)
	sw	t0,ICE_T0*4(k0)
	sw	t1,ICE_T1*4(k0)
	sw	t2,ICE_T2*4(k0)
	sw	t3,ICE_T3*4(k0)
	sw	t4,ICE_T4*4(k0)
	sw	s0,ICE_S0*4(k0)
	sw	ra,ICE_RA*4(k0)
	.set noreorder
	mfc0	t0,C0_EPC
	nop
	.set reorder
	sw	t0,ICE_EPC*4(k0)

	# init s0 (KSEG1)
	la	s0,instr_buffer
	li	k0,K1BASE
	or	s0,k0

	# read the CAUSE register
	.set noreorder
	mfc0	a0,C0_CAUSE
	nop
	.set reorder

	# hw int?
	and	t0,a0,CAUSE_EXCMASK
	bne	t0,zero,send_ack	# brif not a hw int

	# It is a hw int. But is it my int?
	.set noreorder
	mfc0	t0,C0_SR
	nop
	.set reorder
	and	t0,a0		# qualify the CAUSE bits
	and	t0,UART_INTBIT
	beq	t0,zero,send_ack	# brif not mine

	# make sure that this is a *real* attn byte
	# read the byte
	li	k1,UART_BASE
	lbu	k0,UART_RXHR(k1)

	li	k1,ATTN
	bne	k0,k1,restore_rfe	# brif not an attn byte

	# fall thru
	.end gen_vector

/*************************************************************
*/
	.globl send_ack
	.ent send_ack
send_ack:
	li	k1,UART_BASE

	# make sure that the tx is ready
   1:	lbu	k0,UART_STATUS(k1)
	sll	k0,UART_TXRDY
	bgez	k0,1b

	li	k0,ACK
	sb	k0,UART_TXHR(k1)

	# make sure that r8 and r9 are zero.
	li	t0,0
	li	t1,0
	# fall thru
	.end send_ack
	
/*************************************************************
*  ice_loop:
*	This is the main loop. We get words and process them.
*	There are 3 special types of word. 
*		1. RUN_MODE - transfer control to the customer's program.
*		2. SENDSAP  - Send the address of the savearea
*		3. SENDA0   - Execute the code in instr_buffer and send
*		              the value of register a0.
*	All other values are added to the instr_buffer.
*/
	.globl ice_loop
	.ent ice_loop
ice_loop:
	bal	get_cmd

#if 0 /* echo the input. Useful for debug */
	move	a0,v0
	bal	put_word
	b	ice_loop
#endif
   	# check for SENDA0
	li	k1,SENDA0
	bne	k1,v0,1f

	# It is SENDA0. Execute the code in instr_buffer and send 
	# the value of register a0.
	# Make sure that the routine ends with a "j ra".
	sw	zero,(s0)
	li	k0,J_RA_INSTR
	sw	k0,4(s0)
	sw	zero,8(s0)
	# Make sure that the writes complete before the jal.
	.set noreorder
	nop
	nop
	nop
	.set reorder
	# Reset s0 to point to start of instr_buffer.
	la	s0,instr_buffer
	li	k0,K1BASE
	or	s0,k0
	jal	s0		# execute instr_buffer
	bal	put_word	# send A0
	b	ice_loop

   1:	# check for RUN_MODE
	li	k1,RUN_MODE
	bne	k1,v0,1f

restore_rfe:
	# It is RUN_MODE. Transfer control to the client.
	# restore regs
	la	k0,savearea
	li	k1,K1BASE
	or	k0,k1
	lw	AT,ICE_AT*4(k0)
	lw	v0,ICE_V0*4(k0)
	lw	a0,ICE_A0*4(k0)
	lw	a1,ICE_A1*4(k0)
	lw	a2,ICE_A2*4(k0)
	lw	a3,ICE_A3*4(k0)
	lw	t0,ICE_T0*4(k0)
	lw	t1,ICE_T1*4(k0)
	lw	t2,ICE_T2*4(k0)
	lw	t3,ICE_T3*4(k0)
	lw	t4,ICE_T4*4(k0)
	lw	s0,ICE_S0*4(k0)
	lw	ra,ICE_RA*4(k0)
	.set noreorder
	lw	k0,ICE_EPC*4(k0)
	nop
	j	k0		# jump to client
	rfe
	.set reorder

   1:	# check for SENDSAP
	li	k1,SENDSAP
	bne	k1,v0,1f

	# It is SENDSAP. Send address of savearea.
	la	a0,savearea
	or	a0,1		# indicate new format
	bal	put_word
	b	ice_loop

   1:	# else. Not a special word.
	sw	v0,(s0)		# save word in instr_buffer
	addu	s0,4		# ready for next word
	b	ice_loop
	.end ice_loop

	.set at

/*************************************************************
*  get_cmd()
*	Get one word from the serial interface. The result goes
*	in v0.
*/
	.globl get_cmd
	.ent get_cmd
get_cmd:
	li	k1,UART_BASE
	li	a1,4			# get 4 bytes

	# wait for rxrdy
   3:	lbu	k0,UART_STATUS(k1)	
	sll	k0,UART_RXRDY
	bgez	k0,3b

	# get the byte
	lbu	k0,UART_RXHR(k1)	# get the byte

	# first byte?
	bne	a1,4,2f			# brif not first byte

	# is the byte a wakeup?
	bne	k0,ATTN,2f		# brif not a wakeup

	# wait for txrdy
   1:	lbu	k0,UART_STATUS(k1)
	sll	k0,UART_TXRDY
	bgez	k0,1b

	# send an ack
	li	k0,ACK
	sb	k0,UART_TXHR(k1)	# put the byte
	b	3b

   2:	sll	v0,8			# move word into position
	or	v0,k0			# merge byte with word
	subu	a1,1			# bytecount--
	bne	a1,zero,3b		# do next byte

	j	ra
	.end get_cmd

/*************************************************************
*  get_word()
*	Get one word from the serial interface. The result goes
*	in v0.
*/
	.globl get_word
	.ent get_word
get_word:
	li	k1,UART_BASE
	li	a1,4			# get 4 bytes

	# wait for rxrdy
   1:	lbu	k0,UART_STATUS(k1)	
	sll	k0,UART_RXRDY
	bgez	k0,1b

	lbu	k0,UART_RXHR(k1)	# get the byte
	sll	v0,8			# move word into position
	or	v0,k0			# merge byte with word
	subu	a1,1			# bytecount--
	bne	a1,zero,1b		# do next byte

	j	ra
	.end get_word

/*************************************************************
*  put_word()
*	Put one word to the serial interface. The word to be sent
*	comes from a0.
*/

	.globl put_word
	.ent put_word
put_word:
	li	k1,UART_BASE
	li	a1,4			# put 4 bytes

	# wait for txrdy
   1:	lbu	k0,UART_STATUS(k1)
	sll	k0,UART_TXRDY
	bgez	k0,1b

	sb	a0,UART_TXHR(k1)	# put the byte
	srl	a0,8			# next byte
	subu	a1,1			# bytecount--
	bne	a1,zero,1b		# do next byte

	j	ra
	.end put_word

/*************************************************************
* 		End of interrupt-level code		     *
*************************************************************/
	.set at


/*************************************************************
*  cpu_init()
*	This is where the CPU-specific init code lives.
*	This implementation is for the bdmr4001 (4001 eval board).
*	This example is designed to use ChanA of the DUART for connection
*	to the IceController.
*/
	.globl cpu_init
	.ent cpu_init
cpu_init:

        # set M_CFG4001
        # wben, ~tlben, ~dberr, pgsz=111, rdpri, cmode=00, dcen, is1en, icen
        # isize=int5,int4 dsize=int3,int2
        li      t0,(CFG_WBEN|CFG_DCEN|CFG_IS1EN|CFG_ICEN)
        #or     t0,(CFG_PGSZ_2K|CFG_CMODE_NORM|CFG_DSNOOP|CFG_ISNOOP)
        or      t0,(CFG_PGSZ_2K|CFG_CMODE_NORM)

        # setting of DBS0/1 and IBS0/1 is controlled by jumpers on the
        # board that are connected to the CpCond inputs. But rather than
        # use a whole bunch of bc1t instructions to test them. I connect
        # them to the interrupt inputs and then test the CAUSE register.

        # connect CpCond inputs to interrupt inputs
        or      t0,(CFG_CPC0EN|CFG_CPC1EN|CFG_CPC2EN|CFG_CPC3EN)
        li      t1,M_CFG4001
        sw      t0,(t1)         # write CFG4001
        lw      zero,(t1)       # flush wb

        .set noreorder
        # allow time for CFG change to take effect
        nop
        nop
        mfc0    t1,C0_CAUSE
        nop
        .set reorder
        and     t2,t1,(CAUSE_INT5|CAUSE_INT4)
        srl     t2,14-2
        or      t0,t2
        and     t2,t1,(CAUSE_INT3|CAUSE_INT2)
        srl     t2,12-5
        or      t0,t2
        li      t1,M_CFG4001
        sw      t0,(t1)
         
        # disconnect cpCond inputs from interrupt inputs
        and     t0,~(CFG_CPC0EN|CFG_CPC1EN|CFG_CPC2EN|CFG_CPC3EN)
        li      t1,M_CFG4001
        sw      t0,(t1)         # write CFG4001
        lw      zero,(t1)       # flush wb

#ifdef LR4001_RevA
	# RevA has a problem when accessing the timers. Use this
	# sequence for reliable operation.
        # set refresh timer (timer0)
        # 60MHz clock 16ms/1024 = 941
        # Flush Write Buffer & Update config to turnoff I-caches
        .set noreorder
        li      t0, 0xbfff0000  #Address of Configuration Register
        lw      t1, (t0)        #Determine ConfigReg value
        nop
        li      t3, 0xfffffffc
        and     t2, t1, t3      # Mask out Icache enables
        sw      t2, (t0)        #Store it back out
        lw      zero, (t0)      #Loading it 2nd time creates dependency
        nop                     #Along with this nop
         
        nop
        nop
        nop
        nop
        nop
        nop
        nop
        nop
         
        li      a0,M_TMR4001
        li      a1,941
        sw      a1,O_TIC0(a0)   # Actual sw to Timer.
        lw      zero,O_TIC0(a0) # flush wb
         
        li      a1,(TMODE_EN0|TMODE_PULSE0)
        sw      a1,O_TMODE(a0)  # write to tmode reg
        lw      zero,O_TMODE(a0)        # flush wb
         
        sw      t1, (t0)        # Return config register to original value.
        nop
        .set reorder
#else    
        # set refresh timer (timer0)
        # 60MHz clock 16ms/1024 = 941
        li      t1,M_TMR4001
        li      t0,941
        sw      t0,O_TIC0(t1)           # set timer initial count
        li      t0,(TMODE_EN0|TMODE_PULSE0)
        sw      t0,O_TMODE(t1)          # write to tmode reg
        lw      zero,O_TMODE(t1)        # flush wb
#endif   
 
	# initialize chan A of the DUART
	li	t2,UART_BASE
	# 5 = 0 mask off all ints
	sb	zero,5*4(t2)
	# 2 = 0a disable rx & tx
	li	t0,0x0a
	sb	t0,2*4(t2)
	# 2 = 10 reset MR reg
	li	t0,0x10
	sb	t0,2*4(t2)
	# 2 = 20 reset rx
	li	t0,0x20
	sb	t0,2*4(t2)
	# 2 = 30 reset tx
	li	t0,0x30
	sb	t0,2*4(t2)
	# 0 = 13 no parity, 8 bits data
	li	t0,0x13
	sb	t0,0*4(t2)
	# 0 = 0f 2 stop bits
	li	t0,0x0f
	sb	t0,0*4(t2)
	# 4 = ACR = 0
	li	t0,0
	sb	t0,4*4(t2)
	li	t1,ACR_COPY
	sb	t0,(t1)
	# 1 = cc 38400
	li	t0,0xcc
	sb	t0,1*4(t2)
	sb	zero,4*4(t2)
	# 2 = 05 enable rx & tx
	li	t0,0x05
	sb	t0,2*4(t2)
	# 5 = 02 enable rx ints
	li	t0,0x02
	sb	t0,5*4(t2)
	li	t1,IMR_COPY
	sb	t0,(t1)

	 j	ra
	.end cpu_init


#ifdef RUN_APPLICATION

#define RAM_GENVECT	0x80000080
#define STACKSIZE	(8*1024)
	.comm	stack,STACKSIZE
	.comm   flush_cache_ptr,4

/*************************************************************
*/
	.globl _exit
	.globl cstartup
	.ent cstartup
cstartup:
        # identify CPU and flush both caches
	la	s0,r4001_flush
        or      s0,K1BASE
        li      a0,ICACHEI
        jal     s0
        li      a0,DCACHEI
        jal     s0

#if 1	/* necessary for ROM-resident code */
	# copy the data to RAM
	li	a0,FDATA
	jal	cpdata
#endif

	# note that clrbss must not use a3
	jal	clrbss

	# set the global data pointer
	la	gp,_gp

	# save the address of the cache flush routine
	sw	s0,flush_cache_ptr

	# set sp
	la	sp,stack+STACKSIZE-24

#if 1	/* use the RAM-resident exception vectors */
	# copy exception handler to RAM vector
	la	t1,ehandler
	la	t2,ehandler_end
	li	t3,RAM_GENVECT	
   1:	lw	t0,(t1)
	sw	t0,(t3)
	addu	t1,4
	addu	t3,4
	bne	t1,t2,1b

	# flush the Icache
        li      a0,ICACHE
        jal     flush_cache

	# clear BEV
	.set noreorder
	mfc0	t0,C0_SR
	nop
	and	t0,~SR_BEV
	mtc0	t0,C0_SR
	.set reorder
#endif

 	# call the main C routine
	la	t0,main
	jal	t0
_exit:
	b	_exit
	.end cstartup


/*************************************************************
*  getmachtype()
*	Return the CPU type.
*/
	.globl getmachtype
	.ent getmachtype
getmachtype:
	li	v0,4001
	j	ra
	.end getmachtype


/*************************************************************
*  ehandler:
*	This is the exception handler that gets copied to RAM.
*	If the application uses exceptions, it will use this
*	code to transfer control to the IceKernel for all 
*	non-application exceptions.
*/
	.ent ehandler
ehandler:
	la	k0,gen_vector
	j	k0
ehandler_end:
	.end ehandler

/*************************************************************
*  flush_cache(type)
*	A C-callable routine to flush the caches.
*/
	.globl flush_cache
	.ent flush_cache
flush_cache:
	lw	t0,flush_cache_ptr
	j	t0
	.end flush_cache

#endif /*  RUN_APPLICATION */
