#include "pargrid.h"

/*+ main - the main routine for this grid program
           We are running the solver on two different sets of
	   processors simultaneously

     Input Parameters:
     argc, argv - the usual
     argv[1] - the number of processors in the x direction
     argv[2] - the number of processors in the y direction
     argv[3] - the number of points in the x direction on each processor
               do not use less than 3 points
     argv[4] - the number of points in the y direction on each processor
               do not use less than 3 points
     argv[5] - the number of points in the z direction on each processor
               do not use less than 3 points

     To Run, see tools.

     Notes: The grid program solves a linear system associated with
            a 3-D grid distributed across the processors.  The 3-D
            grid is partitioned in two dimensions amongst the
            processors.  The processors on the left and right ends
            of the grid use a 7 pt stencil, those in the middle use
            a 27-pt stencil.  No real problem is being solved, we
            are just demonstrating the use of the linear system solver.

 +*/

int main(int argc, char **argv)
{
	par_grid grid;
	BSprocinfo *procinfo;
	int	work;
	int	orig_np, orig_id;
	int	new_np, new_id;
	int	xpos, ypos;
	int	dims[2], periods[2];
	int	ranks[2];
	int	range[1][3];
	MPI_Comm	temp_comm, grid_comm;
	MPI_Group	orig_group, new_group;

	/* Call BSinit() to initialize BlocklSolve and MPI */
    BSinit(&argc,&argv);

	/* get the number of processors and check it */
	if (argc < 6) {
		SETERRC(ARG_ERR,"Argument list too small\n");
		return 0;
	}
	sscanf(argv[1],"%d",&grid.worker_x);
	sscanf(argv[2],"%d",&grid.worker_y);
	grid.worker_z = 1;
	MPI_Comm_size(MPI_COMM_WORLD,&orig_np);
	MPI_Comm_rank(MPI_COMM_WORLD,&orig_id);
	if (orig_np != grid.worker_x*grid.worker_y*grid.worker_z) {
		SETERRC(ARG_ERR,"Number of processors is not correct\n");
		return 0;
	}
	if ((orig_np % 2) != 0) {
		SETERRC(ARG_ERR,"Number of processors is not divisible by 2\n");
		return 0;
	}
	if ((grid.worker_x % 2) != 0) {
		SETERRC(ARG_ERR,"Number of processors in X direction is not divisible by 2\n");
		return 0;
	}

	/* initialize the two processor sets */
	/* we are going to split only in the x direction */
	/* so the user must give us a number of processors divisible by 2 */
	/* AND the number in the X direction divisible by 2 */
	MPI_Comm_group(MPI_COMM_WORLD,&orig_group);
	if (orig_id < orig_np / 2) {
		range[0][0] = 0;
		range[0][1] = (orig_np / 2) - 1;
		range[0][2] = 1;
	} else {
		range[0][0] = orig_np / 2;
		range[0][1] = orig_np - 1;
		range[0][2] = 1;
	}
	MPI_Group_range_incl(orig_group,1,range,&new_group);
	MPI_Comm_create(MPI_COMM_WORLD,new_group,&temp_comm);
	MPI_Comm_size(temp_comm,&new_id);
	grid.worker_x /= 2;
	dims[0] = grid.worker_x;
	dims[1] = grid.worker_y;
	periods[0] = 1;
	periods[1] = 1;
	MPI_Cart_create(temp_comm,2,dims,periods,0,&grid_comm);
	
	/* set up the context for BlockSolve */
	procinfo = BScreate_ctx(); CHKERRN(0);
	/* tell it that this matrix has no i-nodes or cliques */
	BSctx_set_si(procinfo,FALSE); CHKERRN(0);
	/* tell it to print out some information on the reordering */
	BSctx_set_pr(procinfo,TRUE); CHKERRN(0);
	/* tell it the number of processors and the procset */
	BSctx_set_ps(procinfo,grid_comm); CHKERRN(0);
	MPI_Comm_size(grid_comm,&new_np);
	MPI_Comm_rank(grid_comm,(int *)(&new_id));
	BSctx_set_np(procinfo,new_np); CHKERRN(0);
	BSctx_set_id(procinfo,new_id); CHKERRN(0);
	BSctx_set_scaling(procinfo,TRUE); CHKERRN(0);
	BSctx_set_num_rhs(procinfo,1); CHKERRN(0);
	BSctx_set_pre(procinfo,PRE_ICC); CHKERRN(0);
	grid.icc_storage = TRUE;
	grid.symmetric = TRUE;
	grid.ncomp = 1;
	grid.positive = FALSE;

	if(procinfo->my_id==0) {
		printf("\n");
		printf("************** Blocksolve Example Grid2 *******************\n");
	}

	if (new_np != grid.worker_x*grid.worker_y*grid.worker_z) {
		SETERRC(ARG_ERR,"Number of processors in procset is not correct\n");
		return 0;
	}

	/* read in grid parameters */
	if (PSISROOT(procinfo)) {
		printf("o  Number of workers (x,y,z): %d %d %d\n",
			grid.worker_x,grid.worker_y,grid.worker_z);
	}
	sscanf(argv[3],"%d",&grid.num_x);
	sscanf(argv[4],"%d",&grid.num_y);
	sscanf(argv[5],"%d",&grid.num_z);

	/* local grid size and type */
	/* the end processors are 7-pt stencils, the others are 27-pt */
	MPI_Cart_coords(grid_comm,new_id,2,ranks);
	xpos = ranks[0];
	ypos = ranks[1];
	if ((xpos == grid.worker_x-1) || (xpos == 0)) {
		grid.l_num_x = grid.num_x;
		grid.type = 7;
	} else {
		grid.l_num_x = grid.num_x / 2;
		grid.type = 27;
	}
	grid.l_num_y = grid.num_y;
	grid.l_num_z = grid.num_z;
	grid.num_x = grid.l_num_x;
	GISUM(&grid.num_x,1,&work,procinfo->procset);
	grid.num_y = grid.l_num_y;
	GISUM((&grid.num_y),1,&work,procinfo->procset);
	grid.num_z = grid.l_num_z;
	GISUM((&grid.num_z),1,&work,procinfo->procset);

	if (PSISROOT(procinfo)) {
		printf("o  Local discretizations (x,y,z): %d %d %d\n",
			grid.l_num_x,grid.l_num_y,grid.l_num_z);
	}

	/* call the worker */
	worker(&grid,procinfo); 

	if(procinfo->my_id==0) {
		printf("************ End Blocksolve Example Grid2 *****************\n");
		printf("\n");
	}

	/* print logging if enabled */
	BSprint_log(procinfo); CHKERRN(0);

	/* free the context */
	BSfree_ctx(procinfo); CHKERRN(0);

	/* finalize BlockSolve and MPI */
	BSfinalize();

	exit(0);
}


syntax highlighted by Code2HTML, v. 0.9.1