dnl  SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store
dnl  sum in a third limb vector.

dnl  Copyright 2001 Free Software Foundation, Inc.

dnl  This file is part of the GNU MP Library.

dnl  The GNU MP Library is free software; you can redistribute it and/or modify
dnl  it under the terms of the GNU Lesser General Public License as published
dnl  by the Free Software Foundation; either version 3 of the License, or (at
dnl  your option) any later version.

dnl  The GNU MP Library is distributed in the hope that it will be useful, but
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
dnl  License for more details.

dnl  You should have received a copy of the GNU Lesser General Public License
dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.


include(`../config.m4')

C INPUT PARAMETERS
define(rp,%o0)
define(s1p,%o1)
define(s2p,%o2)
define(n,%o3)
define(cy,%g1)

C This code uses 64-bit operations on `o' and `g' registers.  It doesn't
C require that `o' registers' upper 32 bits are preserved by the operating
C system, but if they are not, they must be zeroed.  That is indeed what
C happens at least on Slowaris 2.5 and 2.6.

C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at
C about 10 cycles/limb from the Ecache.

ASM_START()
PROLOGUE(mpn_add_n)
	lduw	[s1p+0],%o4
	lduw	[s2p+0],%o5
	addcc	n,-2,n
	bl,pn	%icc,L(end1)
	lduw	[s1p+4],%g2
	lduw	[s2p+4],%g3
	be,pn	%icc,L(end2)
	mov	0,cy

	.align	16
L(loop):
	add	%o4,%o5,%g4
	add	rp,8,rp
	lduw	[s1p+8],%o4
	fitod	%f0,%f2
C ---
	add	cy,%g4,%g4
	addcc	n,-1,n
	lduw	[s2p+8],%o5
	fitod	%f0,%f2
C ---
	srlx	%g4,32,cy
	add	s2p,8,s2p
	stw	%g4,[rp-8]
	be,pn	%icc,L(exito)+4
C ---
	add	%g2,%g3,%g4
	addcc	n,-1,n
	lduw	[s1p+12],%g2
	fitod	%f0,%f2
C ---
	add	cy,%g4,%g4
	add	s1p,8,s1p
	lduw	[s2p+4],%g3
	fitod	%f0,%f2
C ---
	srlx	%g4,32,cy
	bne,pt	%icc,L(loop)
	stw	%g4,[rp-4]
C ---
L(exite):
	add	%o4,%o5,%g4
	add	cy,%g4,%g4
	srlx	%g4,32,cy
	stw	%g4,[rp+0]
	add	%g2,%g3,%g4
	add	cy,%g4,%g4
	stw	%g4,[rp+4]
	retl
	srlx	%g4,32,%o0

L(exito):
	add	%g2,%g3,%g4
	add	cy,%g4,%g4
	srlx	%g4,32,cy
	stw	%g4,[rp-4]
	add	%o4,%o5,%g4
	add	cy,%g4,%g4
	stw	%g4,[rp+0]
	retl
	srlx	%g4,32,%o0

L(end1):
	add	%o4,%o5,%g4
	stw	%g4,[rp+0]
	retl
	srlx	%g4,32,%o0

L(end2):
	add	%o4,%o5,%g4
	srlx	%g4,32,cy
	stw	%g4,[rp+0]
	add	%g2,%g3,%g4
	add	cy,%g4,%g4
	stw	%g4,[rp+4]
	retl
	srlx	%g4,32,%o0
EPILOGUE(mpn_add_n)


syntax highlighted by Code2HTML, v. 0.9.1