/* This software was developed by Bruce Hendrickson and Robert Leland   *
 * at Sandia National Laboratories under US Department of Energy        *
 * contract DE-AC04-76DP00789 and is copyrighted by Sandia Corporation. */

#include <stdio.h>
#include "params.h"
#include "defs.h"
#include "structs.h"


void      balance(graph, nvtxs, nedges, using_vwgts, using_ewgts, vwsqrt,
		            igeom, coords, assignment, goal,
		            architecture, ndims_tot, mesh_dims,
		            global_method, local_method, rqi_flag, vmax, ndims,
		            eigtol, hops)
struct vtx_data **graph;	/* data structure for graph */
int       nvtxs;		/* number of vertices in full graph */
int       nedges;		/* number of edges in graph */
int       using_vwgts;		/* are vertex weights being used? */
int       using_ewgts;		/* are edge weights being used? */
double   *vwsqrt;		/* sqrt of vertex weights (length nvtxs+1) */
int       igeom;		/* geometric dimension for inertial method */
float   **coords;		/* coordinates for inertial method */
short    *assignment;		/* set number of each vtx (length n) */
double   *goal;			/* desired set sizes */
int       architecture;		/* 0=> hypercube, d=> d-dimensional mesh */
int       ndims_tot;		/* number of cuts to make in total */
int      *mesh_dims;		/* shape of mesh */
int       global_method;	/* global partitioning algorithm */
int       local_method;		/* local partitioning algorithm */
int       rqi_flag;		/* should I use multilevel eigensolver? */
int       vmax;			/* if so, how many vertices to coarsen down to? */
int       ndims;		/* number of eigenvectors (2^d sets) */
double    eigtol;		/* tolerance on eigenvectors */
short     (*hops)[MAXSETS];	/* between-set hop cost for KL */
{
    extern int TERM_PROP;	/* invoking terminal propogation? */
    extern int DEBUG_TRACE;	/* trace the execution of the code */
    extern int MATCH_TYPE;	/* type of matching to use when coarsening */
    struct vtx_data **subgraph;	/* data structure for subgraph */
    struct set_info *set_info;	/* information about each processor subset */
    struct set_info **set_buckets;	/* buckets for sorting processor sets */
    struct set_info *set;	/* current processor set information */
    short     hops_special[MAXSETS][MAXSETS];	/* hop mtx for nonstandard cases */
    float    *term_wgts;	/* net pull of terminal propogation */
    float    *save_term_wgts;	/* saved location of term_wgts */
    float    *all_term_wgts[MAXSETS];	/* net pull on all sets */
    int      *loc2glob;		/* mapping from subgraph to graph numbering */
    int      *glob2loc;		/* mapping from graph to subgraph numbering */
    int      *setlists;		/* space for linked lists of vertices in sets */
    int      *list_ptrs;	/* headers of each linked list */
    short    *degree;		/* degrees of graph vertices from a subgraph */
    short     subsets[MAXSETS];	/* subsets being created at current step */
    int       setsize[MAXSETS];	/* sizes of sets created by division */
    double    merged_goal[MAXSETS];	/* sizes of sets at this partition level */
    double    sub_vwgt_sum;	/* sum of subgraph vertex weights */
    int       cut_dirs[MAXDIMS];/* directions of processor cuts if mesh */
    int       hops_flag;	/* use normal or special hops? */
    int       ndims_real;	/* actual dimension of partitioning */
    int       nsets_real;	/* actual # subsets to create */
    int       maxsize;		/* size of largest subgraph */
    int       nsets_tot;	/* total sets to divide subgraph into */
    int       subnvtxs;		/* number of vertices in subgraph */
    int       subnedges;	/* number of edgess in subgraph */
    double   *subvwsqrt;	/* vwsqrt array for subgraph */
    short    *subassign;	/* set assignments for subgraph */
    float   **subcoords;	/* coordinates for subgraph */
    int       striping;		/* partition with parallel cuts? */
    int       pass;		/* counts passes through loop */
    int       max_proc_size;	/* size of largest processor set */
    int       old_max_proc_size;/* previous size of largest processor set */
    int       new_dir;		/* new cut direction? => no terminal prop */
    int       nsets;		/* typical number of sets at each cut */
    int       done_dir[3];	/* mesh directions already cut */
    int       i, j;		/* loop counter */
    double   *smalloc();
    int       sfree();
    int       make_maps(), divide_procs();
    void      merge_goals();
    void      divide(), make_subgraph(), make_term_props();
    void      make_subvector(), make_subgeom(), remake_graph();
    void      merge_assignments(), make_setlists();

    if (DEBUG_TRACE > 0) {
	printf("<Entering balance>\n");
    }

    if (global_method != 7) {	/* Not read from file. */
	for (i = 1; i <= nvtxs; i++)
	    assignment[i] = 0;
    }

    if (nvtxs <= 1)
	return;

    /* Compute some simple parameters. */
    save_term_wgts = NULL;
    maxsize = 0;
    nsets = 1 << ndims;
    subsets[2] = 2;		/* Needed for vertex separators */

    if (architecture > 0)
	nsets_tot = mesh_dims[0] * mesh_dims[1] * mesh_dims[2];
    else if (architecture == 0)
	nsets_tot = 1 << ndims_tot;

    /* Construct data structures for keeping track of processor sets. */
    set_buckets = (struct set_info **)
       smalloc((unsigned) (nsets_tot + 1) * sizeof(struct set_info));
    set_info = (struct set_info *)
       smalloc((unsigned) nsets_tot * sizeof(struct set_info));
    for (i = 0; i < nsets_tot; i++) {
	set_info[i].setnum = (short) i;
	set_info[i].ndims = -1;
	set_info[i].span[0] = -1;
	set_info[i].next = NULL;
	set_buckets[i + 1] = NULL;
    }
    set_buckets[nsets_tot] = &(set_info[0]);

    if (architecture > 0) {
	set_info[0].low[0] = set_info[0].low[1] = set_info[0].low[2] = 0;
	set_info[0].span[0] = mesh_dims[0];
	set_info[0].span[1] = mesh_dims[1];
	set_info[0].span[2] = mesh_dims[2];
    }
    else if (architecture == 0) {
	set_info[0].ndims = (short) ndims_tot;
    }

    done_dir[0] = done_dir[1] = done_dir[2] = FALSE;
    pass = 0;
    loc2glob = NULL;
    degree = NULL;
    glob2loc = NULL;
    setlists = NULL;
    list_ptrs = NULL;

    old_max_proc_size = 2 * nsets_tot;
    max_proc_size = nsets_tot;

    while (max_proc_size > 1) {	/* Some set still needs to be divided. */

	pass++;
	set = set_buckets[max_proc_size];
	set_buckets[max_proc_size] = set->next;

	/* Divide the processors. */
	hops_flag = divide_procs(architecture, ndims, ndims_tot, set_info, set,
		subsets, (global_method == 3), &ndims_real, &nsets_real, &striping,
				 cut_dirs, mesh_dims, hops_special);

	/* If new cut direction, turn off terminal propagation. */
	new_dir = FALSE;
	if (architecture == 0) {
	    if (old_max_proc_size != max_proc_size)
		new_dir = TRUE;
	}
	else if (architecture > 0) {
	    if (!done_dir[cut_dirs[0]])
		new_dir = TRUE;
	    done_dir[cut_dirs[0]] = TRUE;
	}
	old_max_proc_size = max_proc_size;

	/* Now place the new sets into the set_info data structure. */
	/* This is indexed by number of processors in set. */
	for (i = 0; i < nsets_real; i++) {
	    if (architecture > 0) {
		j = set_info[subsets[i]].span[0] *
		   set_info[subsets[i]].span[1] * set_info[subsets[i]].span[2];
	    }
	    else if (architecture == 0) {
		j = 1 << set_info[subsets[i]].ndims;
	    }
	    set_info[subsets[i]].next = set_buckets[j];
	    set_buckets[j] = &(set_info[subsets[i]]);
	}

	/* Construct desired set sizes for this division step. */
	if (pass == 1) {	/* First partition. */
	    subgraph = graph;
	    subnvtxs = nvtxs;
	    subnedges = nedges;
	    subvwsqrt = vwsqrt;
	    subcoords = coords;
	    subassign = assignment;
	    all_term_wgts[1] = NULL;

	    if (!using_vwgts)
		sub_vwgt_sum = subnvtxs;
	    else {
		sub_vwgt_sum = 0;
		for (i = 1; i <= subnvtxs; i++)
		    sub_vwgt_sum += subgraph[i]->vwgt;
	    }
	    merge_goals(goal, merged_goal, set_info, subsets, nsets_real,
			ndims_tot, architecture, mesh_dims, sub_vwgt_sum);
	}

	else {			/* Not the first cut. */

	    /* After first cut, allocate all space we'll need. */
	    if (pass == 2) {
		glob2loc = (int *) smalloc((unsigned) (nvtxs + 1) * sizeof(int));
		loc2glob = (int *) smalloc((unsigned) (maxsize + 1) * sizeof(int));
		if (!using_vwgts) {
		    subvwsqrt = NULL;
		}
		else {
		    subvwsqrt = (double *)
			smalloc((unsigned) (maxsize + 1) * sizeof(double));
		}

		if (graph != NULL) {
		    subgraph = (struct vtx_data **)
			smalloc((unsigned) (maxsize + 1) * sizeof(struct vtx_data *));
		    degree = (short *) smalloc((unsigned) (maxsize + 1) * sizeof(short));
		}
		else {
		    subgraph = NULL;
		}
		subassign = (short *) smalloc((unsigned) (maxsize + 1) * sizeof(short));
	        if (global_method == 3 ||
                    (MATCH_TYPE == 5 && (global_method == 1 ||
		                         (global_method == 2 && rqi_flag)))) {
		    subcoords = (float **) smalloc((unsigned) 3 * sizeof(float *));
		    subcoords[1] = subcoords[2] = NULL;
		    subcoords[0] = (float *) smalloc((unsigned) (maxsize + 1) * sizeof(float));
		    if (igeom > 1)
			subcoords[1] = (float *) smalloc((unsigned) (maxsize + 1) * sizeof(float));
		    if (igeom > 2)
			subcoords[2] = (float *) smalloc((unsigned) (maxsize + 1) * sizeof(float));
		}
		else
		    subcoords = NULL;
		if (TERM_PROP && graph != NULL) {
		    term_wgts = (float *) smalloc((unsigned)
				      (nsets - 1) * (maxsize + 1) * sizeof(float));
		    save_term_wgts = term_wgts;
		    for (i = 1; i < nsets; i++) {
			all_term_wgts[i] = term_wgts;
			term_wgts += maxsize + 1;
		    }
		}
	    }

	    /* Construct mappings between local and global vertex numbering */
	    subnvtxs = make_maps(setlists, list_ptrs, set->setnum, glob2loc,
				 loc2glob);

	    if (TERM_PROP && !new_dir && graph != NULL) {
		all_term_wgts[1] = save_term_wgts;
		make_term_props(graph, subnvtxs, loc2glob, assignment,
				architecture, ndims_tot, ndims_real, set_info,
				set->setnum, nsets_real, nsets_tot, subsets,
				all_term_wgts, using_ewgts);
	    }
	    else
		all_term_wgts[1] = NULL;

	    /* Form the subgraph in our graph format. */
	    if (graph != NULL) {
		make_subgraph(graph, subgraph, subnvtxs, &subnedges, assignment,
			      set->setnum, glob2loc, loc2glob, degree, using_ewgts);
	    }
	    else
		subnedges = 0;	/* Otherwise some output is garbage */

	    if (!using_vwgts)
		sub_vwgt_sum = subnvtxs;
	    else {
		sub_vwgt_sum = 0;
		for (i = 1; i <= subnvtxs; i++)
		    sub_vwgt_sum += subgraph[i]->vwgt;
	    }
	    merge_goals(goal, merged_goal, set_info, subsets, nsets_real,
			ndims_tot, architecture, mesh_dims, sub_vwgt_sum);

	    /* Condense the relevant vertex weight array. */
	    if (using_vwgts && vwsqrt != NULL)
		make_subvector(vwsqrt, subvwsqrt, subnvtxs, loc2glob);

	    if (global_method == 3 ||
                (MATCH_TYPE == 5 && (global_method == 1 ||
		 (global_method == 2 && rqi_flag)))) {
		make_subgeom(igeom, coords, subcoords, subnvtxs, loc2glob);
	    }
	}

	if (DEBUG_TRACE > 1) {
	    printf("About to call divide with nvtxs = %d, nedges = %d, ",
		   subnvtxs, subnedges);
	    if (!architecture)
		printf("ndims = %d\n", set->ndims);
	    else if (architecture == 1)
		printf("mesh = %d\n", set->span[0]);
	    else if (architecture == 2)
		printf("mesh = %dx%d\n", set->span[0],
		       set->span[1]);
	    else if (architecture == 3)
		printf("mesh = %dx%dx%d\n", set->span[0],
		       set->span[1], set->span[2]);
	}

	/* Perform a single division step. */
	divide(subgraph, subnvtxs, subnedges, using_vwgts, using_ewgts, subvwsqrt,
	       igeom, subcoords, subassign, merged_goal,
	       architecture, all_term_wgts,
	       global_method, local_method, rqi_flag, vmax, ndims_real,
	       eigtol, (hops_flag ? hops_special : hops),
	       nsets_real, striping);

	/* Undo the subgraph construction. */
	if (pass != 1 && graph != NULL) {
	    remake_graph(subgraph, subnvtxs, loc2glob, degree, using_ewgts);
	}

	/* Prepare for next division */
	while (max_proc_size > 1 && set_buckets[max_proc_size] == NULL)
	    --max_proc_size;

	/* Merge the subgraph partitioning with the graph partitioning. */
	if (pass == 1) {
	    subgraph = NULL;
	    subvwsqrt = NULL;
	    subcoords = NULL;
	    subassign = NULL;

	    /* Find size of largest subgraph for recursing. */
	    if (max_proc_size > 1) {
		for (i = 0; i < nsets; i++)
		    setsize[i] = 0;
		for (i = 1; i <= nvtxs; i++)
		    ++setsize[assignment[i]];
		maxsize = 0;
		for (i = 0; i < nsets; i++) {
		    if (setsize[i] > maxsize)
			maxsize = setsize[i];
		}
	    }

	    for (i = 1; i <= nvtxs; i++) {
		assignment[i] = subsets[assignment[i]];
	    }

	    /* Construct list of vertices in sets for make_maps. */
	    if (max_proc_size > 1) {
		setlists = (int *) smalloc((unsigned) (nvtxs + 1) * sizeof(int));
		list_ptrs = (int *) smalloc((unsigned) nsets_tot * sizeof(int));

		make_setlists(setlists, list_ptrs, nsets_real, subsets, assignment,
			      loc2glob, nvtxs, TRUE);
	    }
	}
	else {
	    /* Construct list of vertices in sets for make_maps. */
	    if (max_proc_size > 1) {
		make_setlists(setlists, list_ptrs, nsets_real, subsets, subassign,
			      loc2glob, subnvtxs, FALSE);
	    }

	    merge_assignments(assignment, subassign, subsets, subnvtxs, loc2glob);
	}

    }

    /* Free everything allocated for subgraphs. */
    sfree((char *) list_ptrs);
    sfree((char *) setlists);
    sfree((char *) save_term_wgts);
    sfree((char *) subassign);
    sfree((char *) loc2glob);
    sfree((char *) glob2loc);
    if (graph != NULL)
	sfree((char *) degree);
    if (subgraph != NULL)
	sfree((char *) subgraph);
    if (subvwsqrt != NULL)
	sfree((char *) subvwsqrt);
    if (subcoords != NULL) {
	if (subcoords[0] != NULL)
	    sfree((char *) subcoords[0]);
	if (subcoords[1] != NULL)
	    sfree((char *) subcoords[1]);
	if (subcoords[2] != NULL)
	    sfree((char *) subcoords[2]);
	sfree((char *) subcoords);
    }
    sfree((char *) set_info);
    sfree((char *) set_buckets);
}


syntax highlighted by Code2HTML, v. 0.9.1