/* #define DEBUG_INTERFACE */ /* -*-Midas-*- */
 ###
 ### $Id: mips.m4,v 1.15 1999/01/02 06:11:34 cph Exp $
 ###
 ### Copyright (c) 1989-1999 Massachusetts Institute of Technology
 ###
 ### This program is free software; you can redistribute it and/or
 ### modify it under the terms of the GNU General Public License as
 ### published by the Free Software Foundation; either version 2 of
 ### the License, or (at your option) any later version.
 ###
 ### This program is distributed in the hope that it will be useful,
 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ### General Public License for more details.
 ###
 ### You should have received a copy of the GNU General Public License
 ### along with this program; if not, write to the Free Software
 ### Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 ###

 #### MIPS Architecture assembly language part of the compiled
 #### code interface. See cmpint.txt, cmpint.c, cmpint-mips.h, and
 #### cmpgc.h for more documentation.
 ####
 #### NOTE:
 ####	Assumptions:
 ####
 ####	1) All registers (except double floating point registers) and
 ####	stack locations hold a C long object.
 ####
 ####	2) The C compiler divides registers into three groups:
 ####	- Linkage registers, used for procedure calls and global
 ####	references.  On MIPS: 0 (always 0), 31 (return address),
 ####	28 (global data pointer), and 29 (C stack pointer).
 ####	- super temporaries, not preserved accross procedure calls and
 ####	always usable. On MIPS: 1-15, and 24-25.
 ####	  4-7 are argument registers,
 ####	  2 and 3 are return registers.
 ####	- preserved registers saved by the callee if they are written.
 ####	On MIPS: 16-23, and 30.
 ####
 ####	3) Arguments, if passed on a stack, are popped by the caller
 ####	or by the procedure return instruction (as on the VAX).  Thus
 ####	most "leaf" procedures need not worry about them. On MIPS: All
 ####	arguments have slots in the stack, allocated and popped by the
 ####	caller, but the first four words are actually passed in
 ####	registers 4 through 7, unless they are floating point
 ####	arguments, in which case they are passed in floating point
 ####	registers.
 ####
 ####	4) There is a hardware or software maintained stack for
 ####	control.  The procedure calling sequence may leave return
 ####	addresses in registers, but they must be saved somewhere for
 ####	nested calls and recursive procedures.  On MIPS: Passed in a
 ####	register, but a slot on the stack exists, allocated by the
 ####	caller.  The return link is in 31.  The stack pointer is in
 ####	29.
 ####
 ####	5) C procedures return long values in a super temporary
 ####   register.  MIPS only: two word structures are returned in a
 ####   location specified by the contents of the first argument
 ####   register, and all other arguments are shifted over one
 ####   location (i.e.  apparent argument 1 is passed in the register
 ####   usually used for argument 2, etc.)
 ####
 ####	6) On MIPS the floating point registers fr20-fr31 are
 ####	callee-saves registers, fr12-fr15 are parameter registers, and
 ####	fr4-fr11 and fr16-fr19 are caller-saves registers.  fr0-3 are
 ####	return result registers.  Only the even numbered registers are
 ####	used (odd registers contain second 32 bits of 64 bit values).

 #### Compiled Scheme code uses the following register convention.
 #### Note that scheme_to_interface and the register block are
 #### preserved by C calls, but the others are not, since they change
 #### dynamically.  scheme_to_interface_linked and
 #### trampoline_to_interface can be reached at fixed offsets from
 #### scheme_to_interface.
 ####	- gr1  is the assembler temporary.
 ####   - gr2  is the returned value register.
 ####	- gr3  contains the Scheme stack pointer.
 ####   - gr4 - gr7 are used by C for passing arguments.
 ####	- gr8  contains a cached version of MemTop.
 ####	- gr9  contains the Scheme free pointer.
 ####	- gr10 contains the address of scheme_to_interface.
 ####	- gr11 contains the dynamic link when needed.
 ####	- gr12 - gr15 have no special uses.
 ####   <CALLEE SAVES REGISTERS BELOW HERE>
 ####   - gr16 - gr18 have no special uses.
 ####   - gr19 contains the closure free pointer.
 ####	- gr20 contains the address mask for machine pointers.
 ####	- gr21 contains a pointer to the Scheme interpreter's
 ####	       "register" block.  This block contains the compiler's
 ####          copy of MemTop, the interpreter's registers (val, env,
 ####          exp, etc), temporary locations for compiled code.
 ####   - gr22 contains the top 6 address bits for heap pointers.
 ####	- gr23 contains the closure hook.
 ####   <CALLEE SAVES REGISTERS ABOVE HERE>
 ####	- gr24 has no special use.
 ####   - gr25 is used a an index for dispatch into C.
 ####   - gr26 and 27 are reserved for the OS.
 ####   - gr28 contains the pointer to C static variables.
 ####   - gr29 contains the C stack pointer.
 ####   <CALLEE SAVES REGISTERS BELOW HERE>
 ####   - gr30 has no special use.
 ####   <CALLEE SAVES REGISTERS ABOVE HERE>
 ####	- gr31 is used for linkage (JALR, JAL, BGEZAL, and BLTZAL write it).
 ####
 ####	All other registers are available to the compiler.  A
 ####	caller-saves convention is used, so the registers need not be
 ####	preserved by subprocedures.
 ####
 ####   Notice that register gr25 is used for the index used to
 ####   dispatch into the trampolines and interface routines.

 #	.verstamp	1 31
	.text	
	.align	2
	.set	noat
	.set	noreorder

 # This is required to work around a bug in the IRIX 6.3 assembler.
 # The bug caused an incorrect reference to be generated in the
 # "la	$closure_reg,closure_hook" instruction.
	.globl	closure_hook

define(value, 2)
define(stack, 3)
define(C_arg1, 4)
define(C_arg2, 5)
define(C_arg3, 6)
define(C_arg4, 7)
define(memtop, 8)
define(free, 9)
define(s_to_i, 10)
define(dynlink, 11)

define(closure_free, 19)
define(addr_mask, 20)
define(registers, 21)
define(heap_bits, 22)
define(closure_reg, 23)
	
define(tramp_index, 25)

define(TC_ENTITY, 0x10)
define(TC_FIXNUM, 0x1A)
define(TC_CCENTRY, 0x28)

 # Argument (in $C_arg1) is a compiled Scheme entry point
 # but save C registers first
	.globl	C_to_interface
	.ent	C_to_interface
C_to_interface:
	addi	$sp,$sp,-120
	.frame	$sp,120,$0
	.mask	0x80ff0000,0
	sw	$31,116($sp)		# Save return address
	sw	$30,112($sp)
	sw	$23,108($sp)
	sw	$22,104($sp)
	sw	$21,100($sp)
	sw	$20,96($sp)
	sw	$19,92($sp)
	sw	$18,88($sp)
	sw	$17,84($sp)
	sw	$16,80($sp)
	.fmask	0x00000fff,0
	s.d	$f30,72($sp)
	s.d	$f28,64($sp)
	s.d	$f26,56($sp)
	s.d	$f24,48($sp)
	s.d	$f22,40($sp)
	s.d	$f20,32($sp)
	# 20 and 24($sp) hold return data structure from C hooks
        # 16 is reserved for 4th argument to hooks, if used.
        # 4, 8, and 12($sp) are space for 1st - 3rd argument.
        # 0($sp) is space for holding return pointer
#ifdef DEBUG_INTERFACE
	la	$registers,Debug_Buffer
	.set	at
	sw	$registers,Debug_Buffer_Pointer
	.set	noat
#endif
	la	$registers,Registers
	lw	$heap_bits,Free
	lui	$addr_mask,0xfc00
	and	$heap_bits,$heap_bits,$addr_mask
	nor	$addr_mask,$0,$addr_mask
	la	$closure_reg,closure_hook
	lw	$closure_free,36($registers)
 # ... fall through ...
 # Argument (in $C_arg1) is a compiled Scheme entry point.  Reload
 # the Scheme registers and go to work...any registers not reloaded
 # here must be callee saves by C.
	.globl	interface_to_scheme
interface_to_scheme:
	lw	$value,8($registers)
	lw	$memtop,0($registers)
	lw	$stack,Ext_Stack_Pointer
	lw	$free,Free
	and	$dynlink,$addr_mask,$value
	or	$dynlink,$heap_bits,$dynlink
#ifdef DEBUG_INTERFACE
	andi	$at,$free,3
	bne	$at,0,Bad_Free_Pointer
	nop
Continue_Past_Free_Problem:
#endif
	jal	$31,$C_arg1		# Off to compiled code ...
        addi	$s_to_i,$31,100		# Set up scheme_to_interface

	.globl	hook_jump_table
hook_jump_table:
 # This sequence of NOPs is provided to allow for modification of
 # the sequence that appears above without having to recompile the
 # world.  The compiler "knows" the distance between
 # scheme_to_interface_ble and hook_jump_table (100 bytes)
 #
 # $tramp_index has the offset into the table that is desired.
	.globl	link_to_interface
link_to_interface: 	# ...scheme_to_interface-100
	addi	$31,$31,4		# Skip over format word ...

	.globl	trampoline_to_interface
trampoline_to_interface:		# ...scheme_to_interface-96
	j	scheme_to_interface
	add	$C_arg2,$0,$31		# Arg2 <- trampoline data area
	
	break	1			# ...-88 Used to be generate_closure
	nop				# ...-84

	break	2			# ...-80 Used to be push_closure_entry
	nop				# ...-76

	j	cons_closure		# -72
	lw	$7,40($registers)	# closure limit -68

	j	cons_multi		# -64
	lw	$7,40($registers)	# closure limit -60

	j	shortcircuit_apply	# ...-56
	lw	$C_arg2,0($stack)	# procedure -52

	j	set_interrupt_enables	# ...-48
	lw	$value,4($registers)	# ...-44

	nop				# ...-40
	nop				# ...-36
	nop				# ...-32
	nop				# ...-28
	nop				# ...-24
	nop				# ...-20
	nop				# ...-16
	nop				# ...-12
	nop				# ...-8
	nop				# ...-4

 # DO NOT MOVE the following label, it is used above ...
 #  Argument (in $tramp_index) is index into utility_table for the
 # interface procedure to be called.  The Scheme compiler has saved
 # any registers that it may need.  Registers 5 through 7 are loaded
 # with arguments for the C procedure that is being invoked.  The
 # fourth argument (if used) is stored at 16($sp).

	.globl 	scheme_to_interface
scheme_to_interface:
	sw	$value,8($registers)
	sw	$closure_free,36($registers)
#ifdef DEBUG_INTERFACE
	lw	$value,Stack_Bottom
	addi	$0,$0,0			# Load delay
	sltu	$at,$stack,$value
	bne	$at,$0,Stack_Overflow_Detected
	addi	$0,$0,0
	lw	$value,Debug_Buffer_Pointer
	addi	$0,$0,0
	sw	$stack,0($value)        # Stack pointer
        sw	$25,4($value)           # Index
        sw	$C_arg2,8($value)       # 1st arg.
        sw	$C_arg3,12($value)      # 2nd arg.
        sw	$C_arg4,16($value)      # 3rd arg.
	addi	$value,$value,20
	la	$12,Debug_Buffer_End
	bne	$12,$value,Store_Pointer_Back
	nop
	la	$12,Debug_Buffer
	add	$value,$0,$12
Store_Pointer_Back:
	.set	at
	sw	$value,Debug_Buffer_Pointer
	lw	$value,Debug_Call_Count
	lw	$12,Debug_Call_Max
	addi	$value,$value,1
	sw	$value,Debug_Call_Count
	beq	$value,$12,Debug_Tight_Loop
	nop
	.set	noat
#endif
after_overflow:
	la	$24,utility_table	# Find table
	add	$25,$24,$25		# Address of entry
	lw	$25,0($25)		# gr25 <- Entry
	la	$24,Ext_Stack_Pointer
	sw	$stack,0($24)		# Save Scheme stack pointer
	la	$24,Free
	sw	$free,0($24)		# Save Free
	jal	$31,$25			# Off to interface code
	addi	$C_arg1,$sp,20		# Return value on C stack
	lw	$25,20($sp)		# Get dispatch address
	lw	$C_arg1,24($sp)		# Arg1 <- value component
	jal	$31,$25			# Redispatch ...
	addi	$0,$0,0			# Branch delay...

	.globl	closure_hook
closure_hook:
	# On arrival:
	# GR31 has address of JAL instruction we were supposed to have
	# executed.  This code emulates the JAL.
	# (except that R31 is already set).
	lw	$at,0($31)		# Load JAL instruction
	nop				# Load delay slot
	and	$at,$at,$addr_mask	# clear JAL opcode
	sll	$at,$at,2		# obtain destination address
	or	$at,$at,$heap_bits	# insert top bits into destination
	j	$at			# invoke
	nop				# jump delay slot

	.globl	cons_closure
cons_closure:
	# On arriveal:
	# - GR31 has the address of the manifest closure header,
	#   followed by the closure descriptor (2 words),
	#   followed by the instructions we need to continue with.
	#   The closure descriptor consists of the format+gc-offset word
	#   followed by a PC-relative JAL instruction.
	# - GR4 has the address past the first word on this closure
	#   (assuming the entry point is at closure-free).
	# - GR5 has the increment for closure-free.
	# On return:
	# - GR4 has the address of the closure
	# This code assumes that it can clobber registers 7 and at freely.
 #	lw	$7,40($registers)	# closure limit
	lw	$at,0($31)		# closure header word
	subu	$7,$7,$4		# check if it fits
	bgez	$7,cons_closure_continue
	or	$4,$closure_free,$0	# setup result
	or	$7,$31,$0		# Preserve original return address
	bgezal	$0,invoke_allocate_closure
	addi	$at,$at,2		# Total size = datum(header) + 2

cons_closure_continue:
	add	$closure_free,$closure_free,$5	# allocate
	lw	$5,4($31)		# format+gc-offset word
	lw	$7,8($31)		# JAL instruction
	sw	$0,-12($4)		# Make heap parseable
	sw	$5,-4($4)		# Store format+gc-offset
	srl	$5,$31,2		# return address -> JAL destination
	sw	$at,-8($4)		# Store closure header
	and	$5,$5,$addr_mask	# clear top bits
	addi	$31,$31,12		# Bump past structure
	addu	$5,$5,$7		# JAL instruction
	j	$31			# Return.
	sw	$5,0($4)		# Store the JAL instruction

	.globl	cons_multi
cons_multi:
	# On arriveal:
	# - GR31 has the address of the manifest closure header,
	#   followed by n closure descriptors (2 words each),
	#   followed by the instructions we need to continue with.
	#   Each closure descriptor consists of the format+gc-offset
	#   word followed by a PC-relative JAL instruction.
	# - GR4 has the address past the first word on this closure
	#   (assuming the entry point is at closure-free).
	# - GR5 has the increment for closure-free.
	# - GR6 has the number of entries (>= 1)
	# On return:
	# - GR4 has the address of the closure
	# This code assumes that it can clobber registers 7 and at freely.
 #	lw	$7,40($registers)	# closure limit
	lw	$at,0($31)		# closure header word
	subu	$7,$7,$4		# check if it fits
	bgez	$7,cons_multi_continue
	or	$4,$closure_free,$0	# setup result
	or	$7,$31,$0		# Preserve original return address
	bgezal	$0,invoke_allocate_closure
	addi	$at,$at,1		# Total size = datum(header) + 1

cons_multi_continue:
	add	$closure_free,$closure_free,$5	# allocate
	sw	$at,-12($4)		# Store closure header
	sh	$6,-8($4)		# Store number of entries
	sh	$0,-6($4)		# Tag as multi-closure
	addi	$7,$4,-4		# Pointer to closure entries
	srl	$5,$31,2		# return-address -> JAL destination
	and	$5,$5,$addr_mask	# clear top bits
	addi	$31,$31,4		# bump to first descriptor

store_loop:
	lw	$at,0($31)		# format+gc-offset word
	addi	$6,$6,-1		# decrement count
	addi	$31,$31,8		# bump pointer to block
	sw	$at,0($7)		# store into closure
	lw	$at,-4($31)		# PC-relative JAL
	addi	$7,$7,12		# bump pointer to closure
	add	$at,$at,$5		# absolute JAL instruction
	bgtz	$6,store_loop
	sw	$at,-8($7)		# store JAL instruction
	
	j	$31			# return
	nop				# delay slot

invoke_allocate_closure:
 # $at contains in its datum the minimum size to allocate.
 # $7  contains the "return address" for cons_closure or cons_multi.
 # $31 contains the return address for invoke_allocate_closure.
	addi	$sp,$sp,-80
 # 1 is at, a temp
	sw	$2,80-4($sp)
	sw	$3,80-8($sp)
	and	$4,$at,$addr_mask	# total size (- 1)
	sw	$5,80-12($sp)
	sw	$6,80-16($sp)
	sw	$7,80-20($sp)		# Original value of r31
 #	sw	$8,0($registers)	# memtop is read-only
	la	$7,Free
	sw	$9,0($7)
	sw	$10,80-24($sp)
	sw	$11,80-28($sp)
	sw	$12,80-32($sp)
	sw	$13,80-36($sp)
	sw	$14,80-40($sp)
	sw	$15,80-44($sp)
 # 16-23 are callee-saves registers.
	sw	$24,80-48($sp)
	sw	$25,80-52($sp)
 # 26-29 are taken up by the OS and the C calling convention.
 # 30 is a callee-saves register.
	sw	$31,80-60($sp)		# return address
	jal	allocate_closure
	sw	$closure_free,36($registers) # uncache

	lw	$closure_free,36($registers)
	lw	$31,80-20($sp)		# original value of r31
	lw	$25,80-52($sp)
	lw	$24,80-48($sp)
	lw	$15,80-44($sp)
	lw	$14,80-40($sp)
	lw	$13,80-36($sp)
	lw	$12,80-32($sp)
	lw	$11,80-28($sp)
	lw	$10,80-24($sp)
	lw	$9,Free
	lw	$8,0($registers)
	lw	$7,80-60($sp)		# return address for invoke...
	lw	$6,80-16($sp)
	lw	$5,80-12($sp)
	lw	$3,80-8($sp)
	lw	$2,80-4($sp)
	lw	$at,0($31)		# manifest closure header
	or	$4,$closure_free,$0	# setup result

	j	$7
	addi	$sp,$sp,80

	.globl	shortcircuit_apply
shortcircuit_apply:
	# $C_arg2 contains the procedure one cycle after this point.
	# $C_arg3 contains the frame size
	addi	$at,$0,TC_CCENTRY	# test for compiled entry
	srl	$C_arg4,$C_arg2,26
	bne	$C_arg4,$at,shortcircuit_apply_1
	and	$C_arg2,$addr_mask,$C_arg2 # procedure -> address
	or	$C_arg2,$heap_bits,$C_arg2
	lhu	$C_arg4,-4($C_arg2)	# lose if wrong arity
	addi	$at,$0,0xff
	and	$C_arg4,$at,$C_arg4
	bne	$C_arg4,$C_arg3,shortcircuit_apply_lose
	nop
	j	$C_arg2			# invoke procedure
	addi	$stack,$stack,4		# pop it too

	.globl	shortcircuit_apply_1
shortcircuit_apply_1:
	addi	$at,$0,TC_ENTITY	# Test for entity
	bne	$C_arg4,$at,shortcircuit_apply_lose
	or	$C_arg2,$heap_bits,$C_arg2 # get entity's procedure
	lw	$C_arg2,0($C_arg2)
	addi	$at,$0,TC_CCENTRY	# test for compiled entry
	srl	$C_arg4,$C_arg2,26
	bne	$C_arg4,$at,shortcircuit_apply_lose
	and	$C_arg2,$addr_mask,$C_arg2 # procedure -> address
	or	$C_arg2,$heap_bits,$C_arg2
	lhu	$C_arg4,-4($C_arg2)	# lose if wrong arity
	addi	$at,$0,0xff
	and	$C_arg4,$at,$C_arg4
	addi	$at,$C_arg3,1		# adjust for entity arg
	bne	$C_arg4,$C_arg3,shortcircuit_apply_lose
	nop
	j	$C_arg2			# invoke procedure
	nop				# don't pop entity arg

	.globl	shortcircuit_apply_lose
shortcircuit_apply_lose:
	lw	$C_arg2,0($stack)	# pop procedure into arg register
	addi	$stack,$stack,4
	la	$at,scheme_to_interface	# invoke the standard apply
	j	$at
	addi	$tramp_index,$0,80

	.globl	set_interrupt_enables
set_interrupt_enables:
	# 0($stack) has the new interrupt mask (a fixnum)
	# 4($stack) has the return address (a compiled entry)
	# $value has been set above to old interrupt mask
	lui	$at,(TC_FIXNUM*0x400)	# slap fixnum type code on value
	or	$value,$value,$at
	lw	$C_arg1,0($stack)	# get new interrupt mask
	lw	$C_arg2,48($registers)	# get interrupt code
	and	$C_arg1,$C_arg1,$addr_mask
	sw	$C_arg1,4($registers)	# store new mask in mask register
	# Now, set up the memtop and stack_guard registers.
	# Memtop is -1 if there are any pending interrupts, else
	# "MemTop" if GC interrupt is enabled, else "Heap_Top".
	and	$C_arg2,$C_arg2,$C_arg1	# get masked interrupts
	bne	$C_arg2,$0,set_interrupt_enables_1
	addi	$memtop,$0,-1
	andi	$C_arg2,$C_arg1,4	# test for GC interrupt
	lw	$memtop,MemTop
	bne	$C_arg2,$0,set_interrupt_enables_1
	nop
	lw	$memtop,Heap_Top
	.globl	set_interrupt_enables_1
set_interrupt_enables_1:
	andi	$C_arg2,$C_arg1,1	# test for stack-overflow interrupt
	sw	$memtop,0($registers)
	# Stack_guard's value depends on whether the stack-overflow
	# interrupt is enabled.
	lw	$C_arg3,Stack_Guard
	bne	$C_arg2,$0,set_interrupt_enables_2
	nop
	lw	$C_arg3,Stack_Bottom
	.globl	set_interrupt_enables_2
set_interrupt_enables_2:
	lw	$C_arg2,4($stack)	# get return address
	sw	$C_arg3,44($registers)	# store stack_guard
	and	$C_arg2,$C_arg2,$addr_mask # return to caller
	or	$C_arg2,$C_arg2,$heap_bits
	j	$C_arg2
	addi	$stack,$stack,8

 # Argument 1 (in $C_arg1) is the returned value
	.globl interface_to_C
interface_to_C:
	l.d	$f20,32($sp)
	l.d	$f22,40($sp)
	l.d	$f24,48($sp)
	l.d	$f26,56($sp)
	l.d	$f28,64($sp)
	l.d	$f30,72($sp)
	lw	$16,80($sp)
	lw	$17,84($sp)
	lw	$18,88($sp)
	lw	$19,92($sp)
	lw	$20,96($sp)
	lw	$21,100($sp)
	lw	$22,104($sp)
	lw	$23,108($sp)
	lw	$30,112($sp)
	lw	$31,116($sp)
	addi	$sp,$sp,120		# Pop stack back
	j	$31			# Return
	add	$2,$0,$C_arg1		# Return value to C
	.end	C_to_interface

#ifdef DEBUG_INTERFACE
	.globl	Stack_Overflow_Detected
Stack_Overflow_Detected:
	j	after_overflow
	nop

	.globl	Bad_Free_Pointer
Bad_Free_Pointer:
	j	Continue_Past_Free_Problem
	nop
#endif

	.globl	interface_initialize
	.ent	interface_initialize
interface_initialize:
	.frame	$sp,0,$31
	cfc1	$25,$31		# read FPU control register
	nop
	ori	$25,$25,0xf00	# enable V, Z, O, U traps
	ctc1	$25,$31		# write FPU control register
	nop
	j	$31		# return
	nop
	.end	interface_initialize

	.globl	Debug_Tight_Loop
	.ent	Debug_Tight_Loop
Debug_Tight_Loop:
	beq	$12,$value,Debug_Tight_Loop
	nop
	j	after_overflow
	.end	Debug_Tight_Loop

#ifdef DEBUG_INTERFACE
	.data
	.globl	Debug_Buffer_Pointer
Debug_Buffer_Pointer:
	.word	0
	.globl	Debug_Buffer
Debug_Buffer:
	.word	0:30
Debug_Buffer_End:
	.word	0
	.globl	Debug_Call_Count
Debug_Call_Count:
	.word	0
	.globl	Debug_Call_Max
Debug_Call_Max:
	.word	0
#endif


syntax highlighted by Code2HTML, v. 0.9.1