### -*-Midas-*-
###
### $Id: i386.m4,v 1.57 2002/03/11 21:39:18 cph Exp $
###
### Copyright (c) 1992-2002 Massachusetts Institute of Technology
###
### This program is free software; you can redistribute it and/or
### modify it under the terms of the GNU General Public License as
### published by the Free Software Foundation; either version 2 of the
### License, or (at your option) any later version.
###
### This program is distributed in the hope that it will be useful,
### but WITHOUT ANY WARRANTY; without even the implied warranty of
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
### General Public License for more details.
###
### You should have received a copy of the GNU General Public License
### along with this program; if not, write to the Free Software
### Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
### 02111-1307, USA.
### Intel IA-32 assembly language part of the compiled code interface.
### See cmpint.txt, cmpint.c, cmpint-mc68k.h, and cmpgc.h for more
### documentation.
###
### This m4 source expands into either Unix (gas) source or PC
### (masm/wasm) source.
###
### NOTE:
### Assumptions:
###
### 0) Segment registers and paging are set up for 32-bit "flat"
### operation.
###
### 1) All registers (except double floating point registers) and
### stack locations hold a C long object.
###
### 2) The C compiler divides registers into three groups:
### - Linkage registers, used for procedure calls and global
### references. On i386 (gcc and Zortech C): %ebp, %esp.
### - super temporaries, not preserved accross procedure calls and
### always usable. On i386 (gcc and Zortech C): %eax, %edx, %ecx.
### - preserved registers saved by the callee if they are written.
### On i386 (gcc and Zortech C): all others (%ebx, %esi, %edi).
###
### 3) Arguments, if passed on a stack, are popped by the caller
### or by the procedure return instruction (as on the VAX). Thus
### most "leaf" procedures need not worry about them. On i386,
### arguments are passed on the stack.
###
### 4) There is a hardware or software maintained stack for
### control. The procedure calling sequence may leave return
### addresses in registers, but they must be saved somewhere for
### nested calls and recursive procedures. On i386: saved on
### the stack by the CALL instruction.
###
### 5) C procedures return long values in a super temporary
### register. Two word structures are returned differently,
### depending on the C compiler used. When using GCC, two-word
### structures are returned in {%eax, %edx}. When using a
### compiler compatible with MicroSoft's C compiler (e.g. Zortech
### C), two word structures are returned by returning in %eax the
### address of a structure allocated statically. If the Scheme
### system ever becomes reentrant, this will have to change.
###
### 6) Floating point registers are not preserved by this
### interface. The interface is only called from the Scheme
### interpreter, which does not use floating point data. Thus
### although the calling convention would require us to preserve
### them, they contain garbage.
###
### Compiled Scheme code uses the following register convention:
### - %esp containts the Scheme stack pointer, not the C stack
### pointer.
### - %esi contains a pointer to the Scheme interpreter's "register"
### block. This block contains the compiler's copy of MemTop,
### the interpreter's registers (val, env, exp, etc.),
### temporary locations for compiled code, and the addresses
### of various hooks defined in this file.
### - %edi contains the Scheme free pointer.
### - %ebp contains the Scheme datum mask.
### The dynamic link (when needed) is in Registers[REGBLOCK_COMPILER_TEMP]
### Values are returned in Registers[REGBLOCK_VAL]
###
### All other registers (%eax, %edx, %ecx, %ebx) are available to
### the compiler. A caller-saves convention is used, so the
### registers need not be preserved by subprocedures.
### The following m4 macros can be defined to change how this file is
### expanded.
###
### DASM
### If defined, expand to Intel assembly-language syntax, used by
### Microsoft assembler (MASM) and Watcom assembler (WASM).
### Otherwise, expand to AT&T syntax, used by GAS.
###
### WIN32
### If defined, expand to run under Win32; implies DASM.
### OS2
### If defined, expand to run under OS/2. This macro does nothing
### more than define SUPPRESS_LEADING_UNDERSCORE and
### CALLER_ALLOCS_STRUCT_RETURN, which are the conventions used to
### call OS/2 API procedures; note that EMX/GCC doesn't define
### these symbols because it thinks it's running under unix.
###
### If none of { WIN32, OS2 } is defined, expansion is for unix.
###
### SUPPRESS_LEADING_UNDERSCORE
### If defined, external symbol names are generated as written;
### otherwise, they have an underscore prepended to them.
### CALLER_ALLOCS_STRUCT_RETURN
### STATIC_STRUCT_RETURN
### Controls the conventions used to return 8-byte structs from C
### procedures. If CALLER_ALLOCS_STRUCT_RETURN is defined, the
### caller allocates space on the stack and passes a pointer to
### that space on the top of the stack. If STATIC_STRUCT_RETURN
### is defined, the callee returns a pointer to a static struct in
### EAX. Otherwise, the callee returns the struct in EAX/EDX.
### CALLEE_POPS_STRUCT_RETURN
### Modifies the CALLER_ALLOCS_STRUCT_RETURN calling convention.
### Under the modified convention, the callee pops the pointer to
### the allocated space, so the caller doesn't have to. This
### convention is used by GCC 2.9.x.
### WCC386
### Should be defined when using Watcom assembler.
### WCC386R
### Should be defined when using Watcom assembler and generating
### code to use the Watcom register-based argument conventions.
### LINUX_ELF
### If defined, expand to run under Linux ELF.
### TYPE_CODE_LENGTH
### Normally defined to be 6. Don't change this unless you know
### what you're doing.
### DISABLE_387
### If defined, do not generate 387 floating-point instructions.
#### Utility macros and definitions
ifdef(`WIN32',
`define(IF_WIN32,`$1')',
`define(IF_WIN32,`')')
ifdef(`OS2',
`define(IFOS2,`$1')',
`define(IFOS2,`')')
ifdef(`LINUX_ELF',
`define(IF_LINUX_ELF,`$1')',
`define(IF_LINUX_ELF,`')')
ifdef(`DISABLE_387',
`define(IF387,`')',
`define(IF387,`$1')')
ifdef(`DISABLE_387',
`define(IFN387,`$1')',
`define(IFN387,`')')
IF_WIN32(`define(DASM,1)')
ifdef(`WCC386R',`define(WCC386,1)')
ifdef(`DASM',
`define(IFDASM,`$1')',
`define(IFDASM,`')')
ifdef(`DASM',
`define(IFNDASM,`')',
`define(IFNDASM,`$1')')
ifdef(`DASM',
`define(use_external_data,` extrn $1':dword)',
`define(use_external_data,`')')
ifdef(`DASM',
`define(use_external_code,` extrn $1':near)',
`define(use_external_code,`')')
ifdef(`DASM',
`define(export_label,` public $1')',
`define(export_label,` .globl $1')')
IFNDASM(` .file "cmpaux-i386.s"')
# GAS doesn't implement these, for no obvious reason.
IFNDASM(`define(pushad,`pusha')')
IFNDASM(`define(popad,`popa')')
IFNDASM(`define(pushfd,`pushf')')
IFNDASM(`define(popfd,`popf')')
IFOS2(`define(`SUPPRESS_LEADING_UNDERSCORE',1)')
IF_LINUX_ELF(`define(`SUPPRESS_LEADING_UNDERSCORE',1)')
ifdef(`WCC386R',
`define(EVR,`_$1')',
`ifdef(`SUPPRESS_LEADING_UNDERSCORE',
`define(EVR,`$1')',
`define(EVR,`_$1')')')
# When using the Watcom C compiler with register-based calling
# conventions, source-code function names normally expand to `FOO_',
# but functions that are compiled with prefix keywords such as
# `__cdecl' or `__syscall' expand differently. References to the
# former type of name are marked with `EFR', while references to the
# latter are marked with `EPFR'.
ifdef(`SUPPRESS_LEADING_UNDERSCORE',
`define(EPFR,`$1')',
`define(EPFR,`_$1')')
ifdef(`WCC386R',
`define(EFR,`$1_')',
`define(EFR,`EPFR($1)')')
define(hook_reference,`EFR(asm_$1)')
define(define_data,`export_label(EVR($1))')
define(define_code_label,`
export_label($1)
$1:')
define(define_c_label,`define_code_label(EPFR($1))')
define(define_debugging_label,`define_code_label($1)')
define(define_hook_label,`define_code_label(hook_reference($1))')
ifdef(`DASM',
`define(DECLARE_DATA_SEGMENT,` .data')',
`define(DECLARE_DATA_SEGMENT,` .data')')
ifdef(`DASM',
`define(DECLARE_CODE_SEGMENT,` .code')',
`define(DECLARE_CODE_SEGMENT,` .text')')
ifdef(`DASM',
`define(declare_alignment,` align $1')',
`define(declare_alignment,` .align $1')')
ifdef(`DASM',
`define(allocate_word,`EVR($1) dw 0')',
`define(allocate_word,` .comm EVR($1),2')')
ifdef(`DASM',
`define(allocate_longword,`EVR($1) dd 0')',
`define(allocate_longword,` .comm EVR($1),4')')
ifdef(`DASM',
`define(allocate_space,`EVR($1) db $2 dup (0)')',
`define(allocate_space,`EVR($1):
.space $2')')
ifdef(`DASM',
`define(HEX, `0$1H')',
`define(HEX, `0x$1')')
ifdef(`DASM',
`define(OP,`$1$3')',
`define(OP,`$1$2')')
ifdef(`DASM',
`define(TW,`$2,$1')',
`define(TW,`$1,$2')')
ifdef(`DASM',
`define(ABS, `dword ptr $1')',
`define(ABS, `$1')')
ifdef(`DASM',
`define(IMM, `$1')',
`define(IMM, `$$1')')
ifdef(`DASM',
`define(REG,`$1')',
`define(REG,`%$1')')
ifdef(`DASM',
`define(ST,`st($1)')',
`define(ST,`%st ($1)')')
ifdef(`DASM',
`define(IND,`dword ptr [$1]')',
`define(IND,`($1)')')
ifdef(`DASM',
`define(BOF,`byte ptr $1[$2]')',
`define(BOF,`$1($2)')')
ifdef(`DASM',
`define(WOF,`word ptr $1[$2]')',
`define(WOF,`$1($2)')')
ifdef(`DASM',
`define(LOF,`dword ptr $1[$2]')',
`define(LOF,`$1($2)')')
ifdef(`DASM',
`define(DOF,`qword ptr $1[$2]')',
`define(DOF,`$1($2)')')
ifdef(`DASM',
`define(IDX,`dword ptr [$1] [$2]')',
`define(IDX,`($1,$2)')')
ifdef(`DASM',
`define(SDX,`dword ptr $1[$2*$3]')',
`define(SDX,`$1(,$2,$3)')')
ifdef(`DASM',
`define(IJMP,`$1')',
`define(IJMP,`*$1')')
define(TC_LENGTH, ifdef(`TYPE_CODE_LENGTH', TYPE_CODE_LENGTH, 6))
define(DATUM_LENGTH, eval(32 - TC_LENGTH))
define(DATUM_SHIFT, eval(1 << DATUM_LENGTH))
define(ADDRESS_MASK, eval(DATUM_SHIFT - 1))
define(TAG, ($2 + ($1 * DATUM_SHIFT)))
define(TC_FALSE,0)
define(TC_FLONUM,6)
define(TC_TRUE,8)
define(TC_FIXNUM,26)
define(TC_MANIFEST_NM_VECTOR,39)
define(TC_COMPILED_ENTRY,40)
define(REGBLOCK_VAL,8)
define(REGBLOCK_COMPILER_TEMP,16)
define(REGBLOCK_LEXPR_ACTUALS,28)
define(REGBLOCK_PRIMITIVE,32)
define(REGBLOCK_CLOSURE_FREE,36)
define(REGBLOCK_DLINK,REGBLOCK_COMPILER_TEMP)
define(REGBLOCK_UTILITY_ARG4,REGBLOCK_CLOSURE_FREE)
define(COMPILER_REGBLOCK_N_FIXED,16)
define(COMPILER_REGBLOCK_N_HOOKS,80)
define(COMPILER_REGBLOCK_N_TEMPS,256)
define(COMPILER_FIXED_SIZE,1)
define(COMPILER_HOOK_SIZE,1)
define(COMPILER_TEMP_SIZE,3)
define(REGBLOCK_SIZE_IN_OBJECTS,
eval((COMPILER_REGBLOCK_N_FIXED*COMPILER_FIXED_SIZE)
+(COMPILER_REGBLOCK_N_HOOKS*COMPILER_HOOK_SIZE)
+(COMPILER_REGBLOCK_N_TEMPS*COMPILER_TEMP_SIZE)))
# Define the floating-point processor control word. Always set
# round-to-even and double precision. Under Win32, mask all
# exceptions. Under unix and OS/2, mask only the inexact result
# exception.
ifdef(`WIN32',
`define(FP_CONTROL_WORD,HEX(023f))',
`define(FP_CONTROL_WORD,HEX(0220))')
define(regs,REG(esi))
define(rfree,REG(edi))
define(rmask,REG(ebp))
IFDASM(`.586p
.model flat')
DECLARE_DATA_SEGMENT()
declare_alignment(2)
use_external_data(EVR(Free))
use_external_data(EVR(Ext_Stack_Pointer))
use_external_data(EVR(utility_table))
ifdef(`WIN32',`
use_external_data(EVR(RegistersPtr))
',`
define_data(Regstart)
allocate_space(Regstart,128)
define_data(Registers)
allocate_space(Registers,eval(REGBLOCK_SIZE_IN_OBJECTS*4))
')
define_data(i387_presence)
allocate_longword(i387_presence)
define_data(C_Stack_Pointer)
allocate_longword(C_Stack_Pointer)
define_data(C_Frame_Pointer)
allocate_longword(C_Frame_Pointer)
IFOS2(`define(CALLER_ALLOCS_STRUCT_RETURN,1)')
IF_LINUX_ELF(`define(CALLER_ALLOCS_STRUCT_RETURN,1)')
IF_WIN32(`ifdef(`WCC386', `define(`STATIC_STRUCT_RETURN',1)')')
define_data(ia32_cpuid_supported)
allocate_longword(ia32_cpuid_supported)
define_data(ia32_cpuid_needed)
allocate_longword(ia32_cpuid_needed)
DECLARE_CODE_SEGMENT()
declare_alignment(2)
define_c_label(i386_interface_initialize)
OP(push,l) REG(ebp)
OP(mov,l) TW(REG(esp),REG(ebp))
OP(xor,l) TW(REG(eax),REG(eax)) # No 387 available
# Unfortunately, the `movl cr0,ecx' instruction is privileged.
# Use the deprecated `smsw cx' instruction instead.
IF387(`
# OP(mov,l) TW(REG(cr0),REG(ecx)) # Test for 387 presence
smsw REG(cx)
OP(mov,l) TW(IMM(HEX(12)),REG(edx))
OP(and,l) TW(REG(edx),REG(ecx))
OP(cmp,l) TW(REG(edx),REG(ecx))
jne i386_initialize_no_fp
OP(inc,l) REG(eax) # 387 available
OP(sub,l) TW(IMM(4),REG(esp))
fclex
fnstcw WOF(-2,REG(ebp))
OP(and,w) TW(IMM(HEX(f0e0)),WOF(-2,REG(ebp)))
OP(or,w) TW(IMM(FP_CONTROL_WORD),WOF(-2,REG(ebp)))
fldcw WOF(-2,REG(ebp))
i386_initialize_no_fp:
')
OP(mov,l) TW(REG(eax),ABS(EVR(i387_presence)))
# Do a bunch of hair to determine if we need to do cache synchronization.
# First, test to see if the CPUID instruction is supported.
OP(xor,l) TW(REG(eax),REG(eax))
OP(mov,l) TW(REG(eax),ABS(EVR(ia32_cpuid_supported)))
OP(mov,l) TW(REG(eax),ABS(EVR(ia32_cpuid_needed)))
pushfd
OP(pop,l) REG(eax)
OP(mov,l) TW(REG(eax),REG(ecx))
OP(xor,l) TW(IMM(HEX(00040000)),REG(eax))
OP(push,l) REG(eax)
popfd
pushfd
OP(pop,l) REG(eax)
OP(xor,l) TW(REG(ecx),REG(eax))
jz no_cpuid_instr
# Restore original EFLAGS.
OP(push,l) REG(ecx)
popfd
# Now we know that cpuid is supported.
OP(mov,l) TW(IMM(HEX(00000001)),ABS(EVR(ia32_cpuid_supported)))
# Next, use the CPUID instruction to determine the processor type.
OP(push,l) REG(ebx)
OP(xor,l) TW(REG(eax),REG(eax))
cpuid
# Check that CPUID accepts argument 1.
OP(cmp,l) TW(IMM(HEX(00000001)),REG(eax))
jl done_setting_up_cpuid
# Detect "GenuineIntel".
# OP(cmp,l) TW(IMM(HEX(756e6547)),REG(ebx))
# jne not_intel_cpu
# OP(cmp,l) TW(IMM(HEX(49656e69)),REG(edx))
# jne not_intel_cpu
# OP(cmp,l) TW(IMM(HEX(6c65746e)),REG(ecx))
# jne not_intel_cpu
# For CPU families 4 (486), 5 (Pentium), or 6 (Pentium Pro, Pentium
# II, Pentium III), don't use CPUID synchronization.
# OP(mov,l) TW(IMM(HEX(01)),REG(eax))
# cpuid
# OP(shr,l) TW(IMM(HEX(08)),REG(eax))
# OP(and,l) TW(IMM(HEX(0000000F)),REG(eax))
# OP(cmp,l) TW(IMM(HEX(4)),REG(eax))
# jl done_setting_up_cpuid
# OP(cmp,l) TW(IMM(HEX(6)),REG(eax))
# jg done_setting_up_cpuid
#
# jmp cpuid_not_needed
#
#not_intel_cpu:
# Detect "AuthenticAMD".
OP(cmp,l) TW(IMM(HEX(68747541)),REG(ebx))
jne not_amd_cpu
OP(cmp,l) TW(IMM(HEX(69746e65)),REG(edx))
jne not_amd_cpu
OP(cmp,l) TW(IMM(HEX(444d4163)),REG(ecx))
jne not_amd_cpu
# Problem appears to exist only on Athlon models 1, 3, and 4.
OP(mov,l) TW(IMM(HEX(01)),REG(eax))
cpuid
OP(mov,l) TW(REG(eax),REG(ecx))
OP(shr,l) TW(IMM(HEX(08)),REG(eax))
OP(and,l) TW(IMM(HEX(0000000F)),REG(eax))
OP(cmp,l) TW(IMM(HEX(6)),REG(eax)) # family 6 = Athlon
jne done_setting_up_cpuid
OP(mov,l) TW(REG(ecx),REG(eax))
OP(shr,l) TW(IMM(HEX(04)),REG(eax))
OP(and,l) TW(IMM(HEX(0000000F)),REG(eax))
OP(cmp,l) TW(IMM(HEX(6)),REG(eax)) # model 6 and up OK
jge done_setting_up_cpuid
OP(cmp,l) TW(IMM(HEX(2)),REG(eax)) # model 2 OK
je done_setting_up_cpuid
OP(mov,l) TW(IMM(HEX(00000001)),ABS(EVR(ia32_cpuid_needed)))
not_amd_cpu:
done_setting_up_cpuid:
OP(pop,l) REG(ebx)
no_cpuid_instr:
leave
ret
define_c_label(C_to_interface)
OP(push,l) REG(ebp) # Link according
OP(mov,l) TW(REG(esp),REG(ebp)) # to C's conventions
OP(push,l) REG(edi) # Save callee-saves
OP(push,l) REG(esi) # registers
OP(push,l) REG(ebx)
OP(mov,l) TW(LOF(8,REG(ebp)),REG(edx)) # Entry point
# Preserve frame ptr
OP(mov,l) TW(REG(ebp),EVR(C_Frame_Pointer))
# Preserve stack ptr
OP(mov,l) TW(REG(esp),EVR(C_Stack_Pointer))
# Register block = %esi
# Scheme offset in NT
ifdef(`WIN32',
` OP(mov,l) TW(ABS(EVR(RegistersPtr)),regs)',
` OP(lea,l) TW(ABS(EVR(Registers)),regs)')
jmp EPFR(interface_to_scheme)
define_hook_label(trampoline_to_interface)
define_debugging_label(trampoline_to_interface)
OP(pop,l) REG(ecx) # trampoline storage
jmp scheme_to_interface
define_hook_label(scheme_to_interface_call)
define_debugging_label(scheme_to_interface_call)
OP(pop,l) REG(ecx) # arg1 = ret. add
OP(add,l) TW(IMM(4),REG(ecx)) # Skip format info
# jmp scheme_to_interface
define_hook_label(scheme_to_interface)
define_debugging_label(scheme_to_interface)
IF387(`
OP(cmp,l) TW(IMM(0),ABS(EVR(i387_presence)))
je scheme_to_interface_proceed
ffree ST(0) # Free floating "regs"
ffree ST(1)
ffree ST(2)
ffree ST(3)
ffree ST(4)
ffree ST(5)
ffree ST(6)
ffree ST(7)
scheme_to_interface_proceed:
')
OP(mov,l) TW(REG(esp),EVR(Ext_Stack_Pointer))
OP(mov,l) TW(rfree,EVR(Free))
OP(mov,l) TW(EVR(C_Stack_Pointer),REG(esp))
OP(mov,l) TW(EVR(C_Frame_Pointer),REG(ebp))
ifdef(`CALLER_ALLOCS_STRUCT_RETURN',`
OP(sub,l) TW(IMM(8),REG(esp)) # alloc space for struct return
')
OP(push,l) LOF(REGBLOCK_UTILITY_ARG4(),regs) # Utility args
OP(push,l) REG(ebx)
OP(push,l) REG(edx)
OP(push,l) REG(ecx)
ifdef(`CALLER_ALLOCS_STRUCT_RETURN',`
OP(mov,l) TW(REG(esp),REG(ecx)) # push pointer to struct return
OP(add,l) TW(IMM(16),REG(ecx))
OP(push,l) REG(ecx)
')
OP(xor,l) TW(REG(ecx),REG(ecx))
OP(mov,b) TW(REG(al),REG(cl))
OP(mov,l) TW(SDX(EVR(utility_table),REG(ecx),4),REG(eax))
call IJMP(REG(eax))
define_debugging_label(scheme_to_interface_return)
ifdef(`CALLER_ALLOCS_STRUCT_RETURN',`
ifdef(`CALLEE_POPS_STRUCT_RETURN',`',`
OP(add,l) TW(IMM(4),REG(esp)) # pop pointer to struct return
')')
OP(add,l) TW(IMM(16),REG(esp)) # Pop utility args
ifdef(`STATIC_STRUCT_RETURN',`
OP(mov,l) TW(LOF(4,REG(eax)),REG(edx))
OP(mov,l) TW(IND(REG(eax)),REG(eax))
')
ifdef(`CALLER_ALLOCS_STRUCT_RETURN',`
OP(pop,l) REG(eax) # Pop struct return into registers
OP(pop,l) REG(edx)
')
jmp IJMP(REG(eax)) # Invoke handler
define_c_label(interface_to_scheme)
IF387(`
OP(cmp,l) TW(IMM(0),ABS(EVR(i387_presence)))
je interface_to_scheme_proceed
ffree ST(0) # Free floating "regs"
ffree ST(1)
ffree ST(2)
ffree ST(3)
ffree ST(4)
ffree ST(5)
ffree ST(6)
ffree ST(7)
interface_to_scheme_proceed:
')
OP(mov,l) TW(EVR(Free),rfree) # Free pointer = %edi
OP(mov,l) TW(LOF(REGBLOCK_VAL(),regs),REG(eax)) # Value/dynamic link
OP(mov,l) TW(IMM(ADDRESS_MASK),rmask) # = %ebp
OP(mov,l) TW(EVR(Ext_Stack_Pointer),REG(esp))
OP(mov,l) TW(REG(eax),REG(ecx)) # Preserve if used
OP(and,l) TW(rmask,REG(ecx)) # Restore potential dynamic link
OP(mov,l) TW(REG(ecx),LOF(REGBLOCK_DLINK(),regs))
jmp IJMP(REG(edx))
IF_WIN32(`
use_external_code(EFR(WinntExceptionTransferHook))
define_code_label(EFR(callWinntExceptionTransferHook))
call EFR(WinntExceptionTransferHook)
mov edx,eax
')
define_c_label(interface_to_C)
IF387(`
OP(cmp,l) TW(IMM(0),ABS(EVR(i387_presence)))
je interface_to_C_proceed
ffree ST(0) # Free floating "regs"
ffree ST(1)
ffree ST(2)
ffree ST(3)
ffree ST(4)
ffree ST(5)
ffree ST(6)
ffree ST(7)
interface_to_C_proceed:')
OP(mov,l) TW(REG(edx),REG(eax)) # Set up result
OP(pop,l) REG(ebx) # Restore callee-saves
OP(pop,l) REG(esi) # registers
OP(pop,l) REG(edi)
leave
ret
define_code_label(EFR(ia32_cache_synchronize))
OP(push,l) REG(ebp)
OP(mov,l) TW(REG(esp),REG(ebp))
OP(push,l) REG(ebx)
OP(xor,l) TW(REG(eax),REG(eax))
cpuid
OP(pop,l) REG(ebx)
leave
ret
### Run the CPUID instruction for serialization.
define_hook_label(serialize_cache)
pushad
OP(xor,l) TW(REG(eax),REG(eax))
cpuid
popad
ret
### Stub to be used in place of above on machines that don't need it.
define_hook_label(dont_serialize_cache)
ret
### Assembly language hooks used to reduce code size.
### There is no time advantage to using these over using
### scheme_to_interface (or scheme_to_interface_call), but the
### code generated by the compiler can be somewhat smaller.
define(define_jump_indirection,
`define_hook_label($1)
OP(mov,b) TW(IMM(HEX($2)),REG(al))
jmp scheme_to_interface')
define(define_call_indirection,
`define_hook_label($1)
OP(mov,b) TW(IMM(HEX($2)),REG(al))
jmp scheme_to_interface_call')
define_call_indirection(interrupt_procedure,1a)
define_call_indirection(interrupt_continuation,1b)
define_jump_indirection(interrupt_closure,18)
define_jump_indirection(interrupt_continuation_2,3b)
define_hook_label(interrupt_dlink)
OP(mov,l) TW(LOF(REGBLOCK_DLINK(),regs),REG(edx))
OP(mov,b) TW(IMM(HEX(19)),REG(al))
jmp scheme_to_interface_call
###
### This saves even more instructions than primitive_apply
### When the PC is not available. Instead of jumping here,
### a call instruction is used, and the longword offset to
### the primitive object follows the call instruction.
### This code loads the primitive object and merges with
### apply_primitive
###
declare_alignment(2)
define_hook_label(short_primitive_apply)
OP(pop,l) REG(edx) # offset pointer
OP(mov,l) TW(IND(REG(edx)),REG(ecx)) # offset
# Primitive object
OP(mov,l) TW(IDX(REG(edx),REG(ecx)),REG(ecx))
# Merge
jmp hook_reference(primitive_apply)
declare_alignment(2)
define_jump_indirection(primitive_apply,12)
define_jump_indirection(primitive_lexpr_apply,13)
define_jump_indirection(error,15)
define_call_indirection(link,17)
define_call_indirection(assignment_trap,1d)
define_call_indirection(reference_trap,1f)
define_call_indirection(safe_reference_trap,20)
define_call_indirection(primitive_error,36)
### Assembly language hooks used to increase speed.
# define_jump_indirection(sc_apply,14)
#
# define(define_apply_fixed_size,
# `define_hook_label(sc_apply_size_$1)
# OP(mov,l) TW(IMM($1),REG(edx))
# OP(mov,b) TW(IMM(HEX(14)),REG(al))
# jmp scheme_to_interface')
declare_alignment(2)
define_hook_label(sc_apply)
OP(mov,l) TW(REG(ecx),REG(eax)) # Copy for type code
OP(mov,l) TW(REG(ecx),REG(ebx)) # Copy for address
OP(shr,l) TW(IMM(DATUM_LENGTH),REG(eax)) # Select type code
OP(and,l) TW(rmask,REG(ebx)) # Select datum
OP(cmp,b) TW(IMM(TC_COMPILED_ENTRY),REG(al))
jne asm_sc_apply_generic
OP(movs,bl,x) TW(BOF(-4,REG(ebx)),REG(eax)) # Extract frame size
OP(cmp,l) TW(REG(eax),REG(edx)) # Compare to nargs+1
jne asm_sc_apply_generic
jmp IJMP(REG(ebx)) # Invoke
define_debugging_label(asm_sc_apply_generic)
OP(mov,l) TW(IMM(HEX(14)),REG(eax))
jmp scheme_to_interface
define(define_apply_fixed_size,
`declare_alignment(2)
define_hook_label(sc_apply_size_$1)
OP(mov,l) TW(REG(ecx),REG(eax)) # Copy for type code
OP(mov,l) TW(REG(ecx),REG(ebx)) # Copy for address
OP(shr,l) TW(IMM(DATUM_LENGTH),REG(eax)) # Select type code
OP(and,l) TW(rmask,REG(ebx)) # Select datum
OP(cmp,b) TW(IMM(TC_COMPILED_ENTRY),REG(al))
jne asm_sc_apply_generic_$1
OP(cmp,b) TW(IMM($1),BOF(-4,REG(ebx))) # Compare frame size
jne asm_sc_apply_generic_$1 # to nargs+1
jmp IJMP(REG(ebx))
asm_sc_apply_generic_$1:
OP(mov,l) TW(IMM($1),REG(edx))
OP(mov,b) TW(IMM(HEX(14)),REG(al))
jmp scheme_to_interface')
define_apply_fixed_size(1)
define_apply_fixed_size(2)
define_apply_fixed_size(3)
define_apply_fixed_size(4)
define_apply_fixed_size(5)
define_apply_fixed_size(6)
define_apply_fixed_size(7)
define_apply_fixed_size(8)
### The following code is used by generic arithmetic
### whether the fixnum case is open-coded in line or not.
### This takes care of fixnums and flonums so that the common
### numeric types are much faster than the rare ones
### (bignums, ratnums, recnums)
IF387(`declare_alignment(2)
asm_generic_flonum_result:
OP(mov,l) TW(IMM(eval(TAG(TC_MANIFEST_NM_VECTOR,2))),IND(rfree))
OP(mov,l) TW(rfree,REG(eax))
OP(fstp,l) DOF(4,rfree) # fstpd
OP(or,l) TW(IMM(eval(TAG(TC_FLONUM,0))),REG(eax))
OP(and,l) TW(rmask,IND(REG(esp)))
OP(add,l) TW(IMM(12),rfree)
OP(mov,l) TW(REG(eax),LOF(REGBLOCK_VAL(),regs))
ret
declare_alignment(2)
asm_generic_fixnum_result:
OP(and,l) TW(rmask,IND(REG(esp)))
OP(or,b) TW(IMM(TC_FIXNUM),REG(al))
OP(ror,l) TW(IMM(TC_LENGTH),REG(eax))
OP(mov,l) TW(REG(eax),LOF(REGBLOCK_VAL(),regs))
ret
declare_alignment(2)
asm_generic_return_sharp_t:
OP(and,l) TW(rmask,IND(REG(esp)))
OP(mov,l) TW(IMM(eval(TAG(TC_TRUE,0))),LOF(REGBLOCK_VAL(),regs))
ret
declare_alignment(2)
asm_generic_return_sharp_f:
OP(and,l) TW(rmask,IND(REG(esp)))
OP(mov,l) TW(IMM(eval(TAG(TC_FALSE,0))),LOF(REGBLOCK_VAL(),regs))
ret')
define(define_unary_operation,
`declare_alignment(2)
define_hook_label(generic_$1)
OP(pop,l) REG(edx)
OP(mov,l) TW(REG(edx),REG(eax))
OP(shr,l) TW(IMM(DATUM_LENGTH),REG(eax))
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
je asm_generic_$1_fix
OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
jne asm_generic_$1_fail
OP(and,l) TW(rmask,REG(edx))
fld1
OP($4,l) DOF(4,REG(edx))
jmp asm_generic_flonum_result
asm_generic_$1_fix:
OP(mov,l) TW(REG(edx),REG(eax))
OP(shl,l) TW(IMM(TC_LENGTH),REG(eax))
OP($3,l) TW(IMM(eval(1 << TC_LENGTH)),REG(eax))
jno asm_generic_fixnum_result
asm_generic_$1_fail:
OP(push,l) REG(edx)
OP(mov,b) TW(IMM(HEX($2)),REG(al))
jmp scheme_to_interface')
define(define_unary_predicate,
`declare_alignment(2)
define_hook_label(generic_$1)
OP(pop,l) REG(edx)
OP(mov,l) TW(REG(edx),REG(eax))
OP(shr,l) TW(IMM(DATUM_LENGTH),REG(eax))
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
je asm_generic_$1_fix
OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
jne asm_generic_$1_fail
OP(and,l) TW(rmask,REG(edx))
OP(fld,l) DOF(4,REG(edx))
ftst
fstsw REG(ax)
fstp ST(0)
sahf
$4 asm_generic_return_sharp_t
jmp asm_generic_return_sharp_f
asm_generic_$1_fix:
OP(mov,l) TW(REG(edx),REG(eax))
OP(shl,l) TW(IMM(TC_LENGTH),REG(eax))
OP(cmp,l) TW(IMM(0),REG(eax))
$3 asm_generic_return_sharp_t
jmp asm_generic_return_sharp_f
asm_generic_$1_fail:
OP(push,l) REG(edx)
OP(mov,b) TW(IMM(HEX($2)),REG(al))
jmp scheme_to_interface')
define(define_binary_operation,
`declare_alignment(2)
define_hook_label(generic_$1)
OP(pop,l) REG(edx)
OP(pop,l) REG(ebx)
OP(mov,l) TW(REG(edx),REG(eax))
OP(mov,l) TW(REG(ebx),REG(ecx))
OP(shr,l) TW(IMM(DATUM_LENGTH),REG(eax))
OP(shr,l) TW(IMM(DATUM_LENGTH),REG(ecx))
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
je asm_generic_$1_fix
OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
jne asm_generic_$1_fail
OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl))
je asm_generic_$1_flo_flo
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(cl))
jne asm_generic_$1_fail
OP(shl,l) TW(IMM(TC_LENGTH),REG(ebx))
OP(and,l) TW(rmask,REG(edx))
OP(sar,l) TW(IMM(TC_LENGTH),REG(ebx))
OP(fld,l) DOF(4,REG(edx)) # fldd
OP(mov,l) TW(REG(ebx),IND(rfree))
OP($5,l) IND(rfree) # fisubl
jmp asm_generic_flonum_result
asm_generic_$1_fix:
OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl))
je asm_generic_$1_fix_flo
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(cl))
jne asm_generic_$1_fail
OP(mov,l) TW(REG(edx),REG(eax))
OP(mov,l) TW(REG(ebx),REG(ecx))
OP(shl,l) TW(IMM(TC_LENGTH),REG(eax))
OP(shl,l) TW(IMM(TC_LENGTH),REG(ecx))
OP($3,l) TW(REG(ecx),REG(eax)) # subl
jno asm_generic_fixnum_result
asm_generic_$1_fail:
OP(push,l) REG(ebx)
OP(push,l) REG(edx)
OP(mov,b) TW(IMM(HEX($2)),REG(al))
jmp scheme_to_interface
asm_generic_$1_flo_flo:
OP(and,l) TW(rmask,REG(edx))
OP(and,l) TW(rmask,REG(ebx))
OP(fld,l) DOF(4,REG(edx)) # fldd
OP($6,l) DOF(4,REG(ebx)) # fsubl
jmp asm_generic_flonum_result
asm_generic_$1_fix_flo:
OP(shl,l) TW(IMM(TC_LENGTH),REG(edx))
OP(and,l) TW(rmask,REG(ebx))
OP(sar,l) TW(IMM(TC_LENGTH),REG(edx))
OP(fld,l) DOF(4,REG(ebx)) # fldd
OP(mov,l) TW(REG(edx),IND(rfree))
OP($4,l) IND(rfree) # fisubrl
jmp asm_generic_flonum_result')
IF387(`declare_alignment(2)
define_hook_label(generic_divide)
OP(pop,l) REG(edx)
OP(pop,l) REG(ebx)
OP(mov,l) TW(REG(edx),REG(eax))
OP(mov,l) TW(REG(ebx),REG(ecx))
OP(shr,l) TW(IMM(DATUM_LENGTH),REG(eax))
OP(shr,l) TW(IMM(DATUM_LENGTH),REG(ecx))
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
je asm_generic_divide_fix
OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
jne asm_generic_divide_fail
OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl))
je asm_generic_divide_flo_flo
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(cl))
jne asm_generic_divide_fail
OP(mov,l) TW(REG(ebx),REG(ecx))
OP(shl,l) TW(IMM(TC_LENGTH),REG(ecx))
je asm_generic_divide_fail
OP(and,l) TW(rmask,REG(edx))
OP(sar,l) TW(IMM(TC_LENGTH),REG(ecx))
OP(fld,l) DOF(4,REG(edx)) # fldd
OP(mov,l) TW(REG(ecx),IND(rfree))
OP(fidiv,l) IND(rfree)
jmp asm_generic_flonum_result
asm_generic_divide_fix:
OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl))
jne asm_generic_divide_fail
OP(mov,l) TW(REG(edx),REG(ecx))
OP(shl,l) TW(IMM(TC_LENGTH),REG(ecx))
je asm_generic_divide_fail
OP(and,l) TW(rmask,REG(ebx))
OP(sar,l) TW(IMM(TC_LENGTH),REG(ecx))
OP(fld,l) DOF(4,REG(ebx)) # fldd
OP(mov,l) TW(REG(ecx),IND(rfree))
OP(fidivr,l) IND(rfree)
jmp asm_generic_flonum_result
asm_generic_divide_flo_flo:
OP(mov,l) TW(REG(ebx),REG(ecx))
OP(and,l) TW(rmask,REG(ecx))
OP(fld,l) DOF(4,REG(ecx)) # fldd
ftst
fstsw REG(ax)
sahf
je asm_generic_divide_by_zero
OP(and,l) TW(rmask,REG(edx))
OP(fdivr,l) DOF(4,REG(edx))
jmp asm_generic_flonum_result
asm_generic_divide_by_zero:
fstp ST(0) # Pop second arg
asm_generic_divide_fail:
OP(push,l) REG(ebx)
OP(push,l) REG(edx)
OP(mov,b) TW(IMM(HEX(23)),REG(al))
jmp scheme_to_interface')
define(define_binary_predicate,
`declare_alignment(2)
define_hook_label(generic_$1)
OP(pop,l) REG(edx)
OP(pop,l) REG(ebx)
OP(mov,l) TW(REG(edx),REG(eax))
OP(mov,l) TW(REG(ebx),REG(ecx))
OP(shr,l) TW(IMM(DATUM_LENGTH),REG(eax))
OP(shr,l) TW(IMM(DATUM_LENGTH),REG(ecx))
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
je asm_generic_$1_fix
OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
jne asm_generic_$1_fail
OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl))
je asm_generic_$1_flo_flo
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(cl))
jne asm_generic_$1_fail
OP(shl,l) TW(IMM(TC_LENGTH),REG(ebx))
OP(and,l) TW(rmask,REG(edx))
OP(sar,l) TW(IMM(TC_LENGTH),REG(ebx))
OP(fld,l) DOF(4,REG(edx)) # fldd
OP(mov,l) TW(REG(ebx),IND(rfree))
OP(ficomp,l) IND(rfree)
fstsw REG(ax)
sahf
$5 asm_generic_return_sharp_t
jmp asm_generic_return_sharp_f
asm_generic_$1_fix:
OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl))
je asm_generic_$1_fix_flo
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(cl))
jne asm_generic_$1_fail
OP(shl,l) TW(IMM(TC_LENGTH),REG(edx))
OP(shl,l) TW(IMM(TC_LENGTH),REG(ebx))
OP(cmp,l) TW(REG(ebx),REG(edx))
$3 asm_generic_return_sharp_t
jmp asm_generic_return_sharp_f
asm_generic_$1_flo_flo:
OP(and,l) TW(rmask,REG(edx))
OP(and,l) TW(rmask,REG(ebx))
OP(fld,l) DOF(4,REG(edx)) # fldd
OP(fcomp,l) DOF(4,REG(ebx))
fstsw REG(ax)
sahf
$6 asm_generic_return_sharp_t
jmp asm_generic_return_sharp_f
asm_generic_$1_fix_flo:
OP(shl,l) TW(IMM(TC_LENGTH),REG(edx))
OP(and,l) TW(rmask,REG(ebx))
OP(sar,l) TW(IMM(TC_LENGTH),REG(edx))
OP(mov,l) TW(REG(edx),IND(rfree))
OP(fild,l) IND(rfree)
OP(fcomp,l) DOF(4,REG(ebx))
fstsw REG(ax)
sahf
$4 asm_generic_return_sharp_t
jmp asm_generic_return_sharp_f
asm_generic_$1_fail:
OP(push,l) REG(ebx)
OP(push,l) REG(edx)
OP(mov,b) TW(IMM(HEX($2)),REG(al))
jmp scheme_to_interface')
IF387(`define_unary_operation(decrement,22,sub,fsubr)
define_unary_operation(increment,26,add,fadd)
define_unary_predicate(negative,2a,jl,jb)
define_unary_predicate(positive,2c,jg,ja)
define_unary_predicate(zero,2d,je,je)
# define_binary_operation(name,index,fix*fix,fix*flo,flo*fix,flo*flo)
# define_binary_operation( $1, $2, $3, $4, $5, $6)
define_binary_operation(add,2b,add,fiadd,fiadd,fadd)
define_binary_operation(subtract,28,sub,fisubr,fisub,fsub)
define_binary_operation(multiply,29,imul,fimul,fimul,fmul)
# Divide needs to check for 0, so we cant really use the following
# define_binary_operation(divide,23,NONE,fidivr,fidiv,fdiv)
# define_binary_predicate(name,index,fix*fix,fix*flo,flo*fix,flo*flo)
define_binary_predicate(equal,24,je,je,je,je)
define_binary_predicate(greater,25,jg,ja,ja,ja)
define_binary_predicate(less,27,jl,jb,jb,jb)')
IFN387(`define_jump_indirection(generic_decrement,22)
define_jump_indirection(generic_divide,23)
define_jump_indirection(generic_equal,24)
define_jump_indirection(generic_greater,25)
define_jump_indirection(generic_increment,26)
define_jump_indirection(generic_less,27)
define_jump_indirection(generic_subtract,28)
define_jump_indirection(generic_multiply,29)
define_jump_indirection(generic_negative,2a)
define_jump_indirection(generic_add,2b)
define_jump_indirection(generic_positive,2c)
define_jump_indirection(generic_zero,2d)')
# These don't currently differ according to whether there
# is a 387 or not.
define_jump_indirection(generic_quotient,37)
define_jump_indirection(generic_remainder,38)
define_jump_indirection(generic_modulo,39)
define_jump_indirection(nofp_decrement,22)
define_jump_indirection(nofp_divide,23)
define_jump_indirection(nofp_equal,24)
define_jump_indirection(nofp_greater,25)
define_jump_indirection(nofp_increment,26)
define_jump_indirection(nofp_less,27)
define_jump_indirection(nofp_subtract,28)
define_jump_indirection(nofp_multiply,29)
define_jump_indirection(nofp_negative,2a)
define_jump_indirection(nofp_add,2b)
define_jump_indirection(nofp_positive,2c)
define_jump_indirection(nofp_zero,2d)
define_jump_indirection(nofp_quotient,37)
define_jump_indirection(nofp_remainder,38)
define_jump_indirection(nofp_modulo,39)
IFDASM(`end')
### Edwin Variables:
### comment-column: 56
### End:
syntax highlighted by Code2HTML, v. 0.9.1