dnl PowerPC-32 mpn_add_n -- add limb vectors.
dnl Copyright 2002, 2005 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 3 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
C 603e: ?
C 604e: 3.25
C 75x (G3): 3.5
C 7400,7410 (G4): 3.5
C 744x,745x (G4+): 4.25
C power4/ppc970: 2.0
C power5: 2.5
C INPUT PARAMETERS
C rp r3
C s1p r4
C s2p r5
C n r6
C cy r7
ASM_START()
PROLOGUE(mpn_add_nc)
addic r0,r7,-1 C set hw cy from cy argument
cmpwi cr0,r6,15 C more than 15 limbs?
ble L(com) C branch if <= 15 limbs
b L(BIG)
EPILOGUE(mpn_add_nc)
PROLOGUE(mpn_add_n)
addic r0,r0,0 C clear hw cy
cmpwi cr0,r6,15 C more than 15 limbs?
bgt L(BIG) C branch if > 15 limbs
L(com): mtctr r6 C copy size into CTR
addi r3,r3,-4 C offset rp, it's updated before it's used
lwz r0,0(r4) C load s1 limb
lwz r7,0(r5) C load s2 limb
adde r10,r7,r0
bdz L(endS)
L(loopS):
lwzu r0,4(r4) C load s1 limb
lwzu r7,4(r5) C load s2 limb
stwu r10,4(r3) C store result limb
adde r10,r7,r0
bdnz L(loopS)
L(endS):
stwu r10,4(r3) C store result limb
li r3,0
addze r3,r3
blr
L(BIG):
stmw r30,-8(r1) C should avoid this for small sizes!
andi. r12,r6,3
mtctr r12 C copy size into CTR
addi r4,r4,-4
addi r5,r5,-4
addi r3,r3,-4
beq L(multiple_of_4)
lwzu r0,4(r4) C load s1 limb
lwzu r7,4(r5) C load s2 limb
adde r10,r7,r0
bdz L(end0)
L(loop0):
lwzu r0,4(r4) C load s1 limb
lwzu r7,4(r5) C load s2 limb
stwu r10,4(r3) C store result limb
adde r10,r7,r0
bdnz L(loop0)
L(end0):
stwu r10,4(r3) C store result limb
L(multiple_of_4):
srwi r6,r6,2
mtctr r6 C copy size into CTR
lwz r0,4(r4) C load s1 limb
lwz r7,4(r5) C load s2 limb
lwz r8,8(r4) C load s1 limb
lwz r9,8(r5) C load s2 limb
lwz r10,12(r4) C load s1 limb
lwz r11,12(r5) C load s2 limb
lwzu r12,16(r4) C load s1 limb
adde r31,r7,r0 C add limbs with cy, set cy
lwzu r6,16(r5) C load s2 limb
bdz L(enda)
L(loop):
lwz r0,4(r4) C load s1 limb
adde r30,r9,r8 C add limbs with cy, set cy
lwz r7,4(r5) C load s2 limb
stw r31,4(r3) C store result limb
lwz r8,8(r4) C load s1 limb
adde r31,r11,r10 C add limbs with cy, set cy
lwz r9,8(r5) C load s2 limb
stw r30,8(r3) C store result limb
lwz r10,12(r4) C load s1 limb
adde r30,r6,r12 C add limbs with cy, set cy
lwz r11,12(r5) C load s2 limb
stw r31,12(r3) C store result limb
lwzu r12,16(r4) C load s1 limb
adde r31,r7,r0 C add limbs with cy, set cy
stwu r30,16(r3) C store result limb
lwzu r6,16(r5) C load s2 limb
bdnz L(loop) C decrement CTR and loop back
L(enda):
adde r30,r9,r8 C add limbs with cy, set cy
stw r31,4(r3) C store result limb
adde r31,r11,r10 C add limbs with cy, set cy
stw r30,8(r3) C store result limb
adde r30,r6,r12 C add limbs with cy, set cy
stw r31,12(r3) C store result limb
stw r30,16(r3) C store result limb
L(end):
li r3,0
addze r3,r3
lmw r30,-8(r1)
blr
EPILOGUE(mpn_add_n)
syntax highlighted by Code2HTML, v. 0.9.1