/* Netpipe module for mpi-2 one-sided communications by Adam Oline */
#define USE_VOLATILE_RPTR
#include "netpipe.h"
#include <mpi.h>
MPI_Win win;
void Init(ArgStruct *p, int* pargc, char*** pargv)
{
p->prot.use_get = 0; /* Default to put */
p->prot.no_fence = 0; /* Default to fence */
MPI_Init(pargc, pargv);
}
void Setup(ArgStruct *p)
{
int nprocs;
MPI_Comm_rank(MPI_COMM_WORLD, &p->prot.iproc);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
if ( nprocs < 2 )
{
printf("Need at least 2 processes, we have %d\n", nprocs);
exit(-2);
}
{
char s[255], *ptr;
gethostname(s,253);
if( s[0] != '.' ) { /* just print the base name */
ptr = strchr( s, '.');
if( ptr != NULL ) *ptr = '\0';
}
printf("%d: %s\n",p->prot.iproc,s);
fflush(stdout);
}
/* TODO: Finish changing netpipe such that it can run with > 2 procs */
/* 0 <--> (nprocs - 1)
* 1 <--> (nprocs - 2)
* ...
*/
p->tr = p->rcv = 0;
if (p->prot.iproc == 0) {
p->tr = 1;
p->prot.nbor = nprocs-1;
} else if( p->prot.iproc == nprocs-1 ) {
p->rcv = 1;
p->prot.nbor = 0;
}
}
void Sync(ArgStruct *p)
{
MPI_Win_fence(0, win);
}
void PrepareToReceive(ArgStruct *p)
{
}
void SendData(ArgStruct *p)
{
int buf_offset = 0;
/* If we're limiting cache effects, then we need to calculate the offset
* from the beginning of the memory pool
*/
if( !p->cache )
buf_offset = p->s_ptr - p->s_buff;
if( p->prot.use_get )
MPI_Get(p->s_ptr, p->bufflen, MPI_BYTE, p->prot.nbor, buf_offset,
p->bufflen, MPI_BYTE, win);
else
MPI_Put(p->s_ptr, p->bufflen, MPI_BYTE, p->prot.nbor, buf_offset,
p->bufflen, MPI_BYTE, win);
if (p->prot.no_fence == 0)
MPI_Win_fence(0, win);
}
void RecvData(ArgStruct *p)
{
/* If user specified 'no fence' option on cmd line, then we try to bypass
* the fence call by waiting for the last byte to arrive. The MPI-2
* standard does not require any data to be written locally until a
* synchronization call (such as fence) occurs, however, so this may
* hang, depending on the MPI-2 implementation. Currently works with
* MP_Lite .
*/
if( p->prot.no_fence ) {
/* The conditional in the comparison below is necessary because we are
* always waiting for a 'b' to arrive if in no-cache mode, but in cache
* mode the character we are waiting for depends on whether we are the
* transmitter or receiver. Adding a little complexity here helps
* us avoid more complexity elsewhere with regard to the no-cache code.
* We cannot use the same character all the time with cache mode due
* to timing issues.
*/
while(p->r_ptr[p->bufflen-1] != 'a' + (p->cache ? 1 - p->tr : 1) )
sched_yield(); /* Since we made r_ptr volatile, we don't necessarily
* need to call a function here encourage the compiler
* to reload it */
p->r_ptr[p->bufflen-1] = 'a' + (p->cache ? p->tr : 0);
} else {
MPI_Win_fence(0, win);
}
}
void SendTime(ArgStruct *p, double *t)
{
MPI_Send(t, 1, MPI_DOUBLE, p->prot.nbor, 2, MPI_COMM_WORLD);
}
void RecvTime(ArgStruct *p, double *t)
{
MPI_Status status;
MPI_Recv(t, 1, MPI_DOUBLE, p->prot.nbor, 2, MPI_COMM_WORLD, &status);
}
void SendRepeat(ArgStruct *p, int rpt)
{
MPI_Send(&rpt, 1, MPI_INT, p->prot.nbor, 2, MPI_COMM_WORLD);
}
void RecvRepeat(ArgStruct *p, int *rpt)
{
MPI_Status status;
MPI_Recv(rpt, 1, MPI_INT, p->prot.nbor, 2, MPI_COMM_WORLD, &status);
}
void CleanUp(ArgStruct *p)
{
MPI_Finalize();
}
void Reset(ArgStruct *p)
{
}
void AfterAlignmentInit(ArgStruct *p)
{
/* After mallocs and alignment, we need to create MPI Window */
MPI_Win_create(p->r_buff, p->bufflen, 1, NULL, MPI_COMM_WORLD, &win);
}
syntax highlighted by Code2HTML, v. 0.9.1