#include	<stdio.h>
#include	<ctype.h>
#include	<string.h>
#include	<math.h>
#include	<stdlib.h>
#include	<assert.h>

#include	"nab.h"
#include	"errormsg.h"
#include	"memutil.h"
#include	"../cifparse/cifparse.h"

	/* PDB columns, 0-based	*/
#define		PDB_ANUM_COL	7	
#define		PDB_ANAM_COL	12
#define		PDB_RNAM_COL	17
#define		PDB_CNAM_COL	21
#define		PDB_RID_COL	21
#define		PDB_RNUM_COL	22

#define		PDB_XPOS_COL	30
#define		PDB_YPOS_COL	38
#define		PDB_ZPOS_COL	46

#define		PDB_QPOS_COL	54
#define		PDB_RPOS_COL	64

#define		PDB_OPOS_COL	54
#define		PDB_BPOS_COL	60

#define		PDB_XPOS_LEN	8
#define		PDB_YPOS_LEN	8
#define		PDB_ZPOS_LEN	8

#define		PDB_QPOS_LEN	10
#define		PDB_RPOS_LEN	10

#define		PDB_OPOS_LEN	6
#define		PDB_BPOS_LEN	6

#define		PDB_ANUM_LEN	4
#define		PDB_ANAM_LEN	4
#define		PDB_RNAM_LEN	3
#define		PDB_RID_LEN	6
#define		PDB_RID_SIZE	7
#define		PDB_NPOS_COL	8

	/* useful sizes for initial allocs of a_atomname, r_resname	*/
#define	A_NAME_SIZE	8
#define	R_NAME_SIZE	8

#define	D2R	0.01745329251994329576

#define	ATAB_SIZE	1000
static	ATOM_T	atab[ ATAB_SIZE ];
static	int	n_atab;

#define	B_THRESH	1.85
#define	BH_THRESH	1.20

static	RESIDUE_T	res;

MOLECULE_T	*getpdb( char *, char * );
MOLECULE_T	*getcif( char *, char * );
int	putpdb( char *, MOLECULE_T *, char * );
int	putcif( char [], char [], MOLECULE_T * );
int	putbnd( char [], MOLECULE_T * );
int	putdist( char [], MOLECULE_T *, char [], char [] );
static	MOLECULE_T	*fgetpdb( FILE *, char * );
static	MOLECULE_T	*fgetcif( FILE *, char * );
static	int	isnewres( char [], char [], int, int );
static	void	init_atab( void );
static	void	initres( void );
void	NAB_initres( RESIDUE_T *, int );
void	NAB_initatom( ATOM_T *, int );
static	void	makebonds( RESIDUE_T * );
static	REAL_T	dist( ATOM_T *, ATOM_T * );
static	void	fputpdb( FILE *, MOLECULE_T *, char * );
static	void	fputcif( FILE *, char *, MOLECULE_T * );
static	void	mk_brook_rname( char [], RESIDUE_T * );
static	void	mk_brook_aname( char [], char [], char [] );

int     get_mytaskid( void );
void    reducerror( int );
char    *ggets( char *, int, FILE * );
void    upd_molnumbers( MOLECULE_T * );
MOLECULE_T *newmolecule( void );
int	addstrand( MOLECULE_T *, char [] );
int	addresidue( MOLECULE_T *, char [], RESIDUE_T * );
void    select_atoms( MOLECULE_T *, char [] );
int     rt_errormsg_s( int, char[], char[] );


/***********************************************************************
                            GETPDB()
************************************************************************/

/*
 * Get the molecular topology and geometry from a .pdb file.
 * Task 0 opens and closes the file.  All tasks execute fgetpdb
 * but the ggets function reads from the file via task 0, and
 * broadcasts the results to all other tasks.
 */
 
MOLECULE_T *getpdb( char *fname, char *options )
{
  FILE *fp;
  MOLECULE_T *mol;
  int ier;

  if ( !fname || !*fname ) {
    fp = stdin;
  }
  else if ( !strcmp( fname, "-" ) ) {
    fp = stdin;
  }
  else {
    ier = 0;
    if (get_mytaskid() == 0) {
      if( ( fp = fopen( fname, "r" ) ) == NULL ) {
	fprintf( stderr, "getpdb: can't open file %s\n", fname );
	ier = -1;
      }
    }
    reducerror(ier);

    /* The file is open.  Set fp to NULL for all tasks but task 0. */

    if (get_mytaskid() != 0) {
      fp = NULL;
    }
  }

  mol = fgetpdb( fp, options );

  if ( fp != stdin ) {
    if (get_mytaskid() == 0) {
      fclose( fp );
    }
  }

  return( mol );
}

MOLECULE_T *getcif( char *fname, char *datablockname )
{
  FILE	*fp;
  MOLECULE_T *mol;

  if( !fname || !*fname )
    fp = stdin;
  else if( !strcmp( fname, "-" ) )
    fp = stdin;
  else if( ( fp = fopen( fname, "r" ) ) == NULL ){
    fprintf( stderr, "getcif: can't open file %s\n", fname );
    exit( 1 );
  }

  mol = fgetcif( fp, datablockname );

  if( fp != stdin )
    fclose( fp );

  return( mol );
}

/***********************************************************************
                            PUTPDB()
************************************************************************/

/* Put the molecular topology and geometry to a .pdb file via task 0 only. */
 
int putpdb( char *fname, MOLECULE_T *mol, char *options )
{
  FILE *fp;
  int ier;

  if( !mol ){
    if (get_mytaskid() == 0) {
      fprintf( stderr, "putpdb: NULL molecule\n" );
    }
    return( 0 );
  }

  ier = 0;
  if (get_mytaskid() == 0) {
    if( !strcmp( fname, "-" ) )
      fp = stdout;
    else if( ( fp = fopen( fname, "w" ) ) == NULL ){
      rt_errormsg_s( TRUE, E_CANT_OPEN_S, fname );
      ier = -1;
    }
    if (ier >= 0) {
      fputpdb( fp, mol, options );
      if( fp != stdout )
	fclose( fp );
    }
  }
  reducerror(ier);

  return( 0 );
}

int putcif( char fname[], char blockId[], MOLECULE_T *mol )
{
  FILE	*fp;

  if( !mol ){
    fprintf( stderr, "putcif: NULL molecule\n" );
    return( 0 );
  }
  if( !strcmp( fname, "-" ) )
    fp = stdout;
  else if( ( fp = fopen( fname, "w" ) ) == NULL ){
    rt_errormsg_s( TRUE, E_CANT_OPEN_S, fname );
    exit( 1 );
  }
  fputcif( fp, blockId, mol );
  if( fp != stdout )
    fclose( fp );
  return( 0 );
}

int putbnd( char fname[], MOLECULE_T *mol )
{
  FILE	*fp;
  STRAND_T	*sp;
  RESIDUE_T	*res;
  int	a, ta, ai, aj, r, rj, tr;
  int	rval = 0;
  int	*aoff = NULL;
  int	b;
  EXTBOND_T	*ebp;

  if( !mol ){
    fprintf( stderr, "putbnd: NULL molecule\n" );
    return( 0 );
  }
  if( ( fp = fopen( fname, "w" ) ) == NULL ){
    rt_errormsg_s( TRUE, E_CANT_OPEN_S, fname );
    exit( 1 );
  }

  for( tr = 0, sp = mol->m_strands; sp; sp = sp->s_next )
    tr += sp->s_nresidues;

  if( ( aoff = ( int * )malloc( tr * sizeof( int ) ) ) == NULL ){
    rt_errormsg_s( TRUE, E_NOMEM_FOR_S, "aoff array in putbnd" );
    rval = 1;
    goto clean_up;
  }

  for( ta = 0, tr = 0, sp = mol->m_strands; sp; sp = sp->s_next ){
    aoff[ tr ] = ta;
    if( sp->s_nresidues > 0 ){
      ta += sp->s_residues[ 0 ]->r_natoms;
      for( r = 1; r < sp->s_nresidues; r++ ){
	aoff[ tr + r ] = ta;
	ta += sp->s_residues[ r ]->r_natoms;
      }
      tr += sp->s_nresidues;
    }
  }

  for( tr = 0, sp = mol->m_strands; sp; sp = sp->s_next ){
    for( r = 0; r < sp->s_nresidues; r++ ){
      res = sp->s_residues[ r ];
      a = aoff[ r + tr ];
      for( b = 0; b < res->r_nintbonds; b++ ){
	fprintf( fp, "%d %d\n",
		 res->r_intbonds[ b ][ 0 ] + a,
		 res->r_intbonds[ b ][ 1 ] + a );
      }
      for( ebp = res->r_extbonds; ebp; ebp = ebp->eb_next ){
	if( ( rj = ebp->eb_rnum ) < r + 1 )
	  continue;
	ai = aoff[ tr + r ];
	aj = aoff[ tr + rj - 1 ];
	fprintf( fp, "%d %d\n",
		 ebp->eb_anum + ai,
		 ebp->eb_ranum + aj );
      }
    }
    tr += sp->s_nresidues;
  }

  clean_up : ;
  if( aoff )
    free( aoff );
	
  fclose( fp );

  return( rval );
}

int	putdist( char fname[], MOLECULE_T *mol, char aexp1[], char aexp2[] )
{
  FILE		*fp;
  STRAND_T	*sp1;
  RESIDUE_T	*res1, *res2;
  ATOM_T		*ap1, *ap2;
  int		tr, r1, r2, a1, a2;
  RESIDUE_T	**res;
  int		cnt;

  if( !mol ){
    fprintf( stderr, "putdist: NULL molecule\n" );
    return( 0 );
  }

  select_atoms( mol, aexp1 );
  for( sp1 = mol->m_strands; sp1; sp1 = sp1->s_next ){
    sp1->s_attr &= ~AT_SELECTED;
    sp1->s_attr |= ( sp1->s_attr & AT_SELECT ) ? AT_SELECTED : 0;
    for( r1 = 0; r1 < sp1->s_nresidues; r1++ ){
      res1 = sp1->s_residues[ r1 ];
      res1->r_attr &= ~AT_SELECTED;
      res1->r_attr |= ( res1->r_attr & AT_SELECT ) ?
	AT_SELECTED : 0;
      for( a1 = 0, ap1 = res1->r_atoms; a1 < res1->r_natoms;
	   a1++, ap1++ ){
	ap1->a_attr &= ~AT_SELECTED;
	ap1->a_attr |= ( ap1->a_attr & AT_SELECT ) ?
	  AT_SELECTED : 0;
      }
    }
  }
  select_atoms( mol, aexp2 );

  for( tr = 0, sp1 = mol->m_strands; sp1; sp1 = sp1->s_next )
    tr += sp1->s_nresidues;
  if( ( res = ( RESIDUE_T ** )malloc( tr * sizeof( RESIDUE_T * ) ) )
      == NULL )
  {
    fprintf( stderr, "can't allocate res\n" );
    goto clean_up;
  }
  for( r2 = 0, sp1 = mol->m_strands; sp1; sp1 = sp1->s_next ){
    for( r1 = 0; r1 < sp1->s_nresidues; r1++ )
      res[ r2++ ] = sp1->s_residues[ r1 ];
  }

  if( ( fp = fopen( fname, "w" ) ) == NULL ){
    fprintf( stderr, "can't open dist file %s\n", fname );
    goto clean_up;
  }

  for( cnt = 0, r1 = 0; r1 < tr; r1++ ){
    res1 = res[ r1 ];
    for( a1 = 0; a1 < res1->r_natoms; a1++ ){
      ap1 = &res1->r_atoms[ a1 ];
      if( AT_SELECTED & ap1->a_attr ){
	for( r2 = 0; r2 < tr; r2++ ){
	  res2 = res[ r2 ];
	  for( a2 = 0; a2 < res2->r_natoms; a2++ ){
	    ap2 = &res2->r_atoms[ a2 ];
	    if( AT_SELECT & ap2->a_attr ){
	      fprintf( fp, "%3d %-4s %-4s %3d %-4s %4s %8.3f\n",
		       r1 + 1, res1->r_resname, ap1->a_atomname,
		       r2 + 1, res2->r_resname, ap2->a_atomname,
		       dist( ap1, ap2 ) );
	      cnt++;
	    }
	  }
	}
      }
    }
  }

  clean_up : ;
  if( res )
    free( res );

  return( cnt );
}

/***********************************************************************
                            FGETPDB()
************************************************************************/

/* Parse the molecular topology and geometry from a .pdb file via all tasks. */
 
static MOLECULE_T *fgetpdb( FILE *fp, char *options )
{
  MOLECULE_T	*mol;
  ATOM_T		*ap;
  int	l_cid, cid;
  int	nchains;
  char	line[ 82 ];
  char	sname[ 10 ];
  char	aname[ 10 ];
  char	rname[ 10 ], l_rname[ 10 ];
  char	rid[ 10 ], l_rid[ 10 ]; /* cols 22-27 */
  char	field[ 10 ];   /* temp. used to get coords, etc.  */
  char	*np, *np1;
  int	rnum, l_rnum;
  REAL_T	x, y, z, q, r, occ, bfact;
  int		i;
  char	temp;
  char	loptions[ 256 ];

  if( options == NULL )
	*loptions = '\0';
  else
	strcpy( loptions, options );

  initres();
  init_atab();

  mol = newmolecule();

  l_cid = '\0';
  *l_rname = '\0';
  l_rnum = 0;
  for( nchains = 0, n_atab = 0; ; ){

    /*
     * The ggets function performs fgets via task 0 only,
     * and broadcasts the result to all other tasks.
     */

    if( ggets( line, sizeof( line ), fp ) == NULL )
      break;

    /* pad input line out to 80 columns with blanks  */
    for( i=strlen( line ); i<80; i++ ) line[i] = ' ';
    line[ 80 ] = '\0';

    if( strncmp( "ATOM", line, 4 ) == 0 ||
	strncmp( "HETATM", line, 6 ) == 0 ){
      strncpy( aname, &line[ PDB_ANAM_COL ], PDB_ANAM_LEN );
      aname[ PDB_ANAM_LEN ] = '\0';
      /* strip blanks: */
      for( np = np1 = aname; *np; np++ ){
	if( *np != ' ' )
	  *np1++ = *np;
      }
      *np1 = '\0';

      /* rotate initial digit: */
      if( aname[0] == '1' || aname[0] == '2' ||
	  aname[0] == '3' ){
	temp = aname[0];
	for( i=0; i<strlen(aname)-1; i++ )
	  aname[i] = aname[i+1];
	aname[i] = temp;
      }

      /* change Brookhaven nucleic acid atom names to iupuac */
      if( aname[2] == '*' ) aname[2] = '\'';
      if( !strcmp( aname, "C5M" ) ) strcpy( aname, "C7" );
				
      strncpy( rname, &line[ PDB_RNAM_COL ], PDB_RNAM_LEN );
      rname[ PDB_RNAM_LEN ] = '\0';
      for( np = np1 = rname; *np; np++ ){
	if( *np != ' ' )
	  *np1++ = *np;
      }
      *np1 = '\0';

      /* check to see if this residue is RNA, and change the
	 Brookahven name H2'1 to the Amber (and IUPAC?) standard
	 H2'.  If the Brookhaven file doesn't have protons, there
	 is no problem; if it does, they usually follow the heavy
	 atoms, and the following code will also work.  If H2'
	 precedes O2', then the code below will fail, and something
	 will have to be written to pre-scan the entire resiude :-(   */
      if( !strcmp( aname, "O2\'" ) ) res.r_kind = RT_RNA; 
      if( !strcmp( aname, "H2\'1") && res.r_kind == RT_RNA )
	strcpy( aname, "H2\'" );

      /* change Brookhaven nucleic acid residue names to iupuac */
      if( !strcmp( rname, "A" ) ) strcpy( rname, "ADE" );
      if( !strcmp( rname, "C" ) ) strcpy( rname, "CYT" );
      if( !strcmp( rname, "G" ) ) strcpy( rname, "GUA" );
      if( !strcmp( rname, "T" ) ) strcpy( rname, "THY" );
      if( !strcmp( rname, "U" ) ) strcpy( rname, "URA" );

      /* grab the chain, r seq num & ins. # */
      strncpy( rid, &line[ PDB_RID_COL ], PDB_RID_LEN );
      rid[ PDB_RID_LEN ] = '\0';

      cid = line[ PDB_CNAM_COL ];
      sscanf( &line[ PDB_RNUM_COL ], "%d", &rnum );

      strncpy( field, &line[PDB_XPOS_COL], PDB_XPOS_LEN );
      field[ PDB_XPOS_LEN ] = '\0';
      x = atof( field );

      strncpy( field, &line[PDB_YPOS_COL], PDB_YPOS_LEN );
      field[ PDB_YPOS_LEN ] = '\0';
      y = atof( field );

      strncpy( field, &line[PDB_ZPOS_COL], PDB_ZPOS_LEN );
      field[ PDB_ZPOS_LEN ] = '\0';
      z = atof( field );

      if( strstr( loptions, "-pqr") ){

	/*			if we decide to make pqr files fixed format, the following
				code may help:

				strncpy( field, &line[PDB_QPOS_COL], PDB_QPOS_LEN );
				field[ PDB_QPOS_LEN ] = '\0';
				q = atof( field );

				strncpy( field, &line[PDB_RPOS_COL], PDB_RPOS_LEN );
				field[ PDB_RPOS_LEN ] = '\0';
				r = atof( field );

				for now, with free format, use the following:               */

#ifdef DOUBLE_PRECISION
	sscanf( &line[PDB_QPOS_COL], "%lf%lf", &q, &r );
#else
	sscanf( &line[PDB_QPOS_COL], "%f%f", &q, &r );
#endif

	occ = 1.0;
	bfact = 0.0;

      }else{
	/* use Bondi radii and zero charges for defaults: */
	if( !strncmp( aname, "H", 1 ) ) r = 1.2;
	else if( !strncmp( aname, "C", 1 ) ) r = 1.70;
	else if( !strncmp( aname, "N", 1 ) ) r = 1.55;
	else if( !strncmp( aname, "O", 1 ) ) r = 1.50;
	else if( !strncmp( aname, "S", 1 ) ) r = 1.80;
	else if( !strncmp( aname, "P", 1 ) ) r = 1.85;
	else r = 1.7;    /* carbon-like radius for others */
	q = 0.0;

	/* read in occ. and bfactor */
	strncpy( field, &line[PDB_OPOS_COL], PDB_OPOS_LEN );
	field[ PDB_OPOS_LEN ] = '\0';
	occ = atof( field );

	strncpy( field, &line[PDB_BPOS_COL], PDB_BPOS_LEN );
	field[ PDB_BPOS_LEN ] = '\0';
	bfact = atof( field );
      }

      if( cid != l_cid ){
	if( n_atab > 0 ){
	  strcpy( res.r_resname, l_rname );
	  strcpy( res.r_resid, l_rid );
	  res.r_num = l_rnum;
	  res.r_natoms = n_atab;
	  makebonds( &res );
	  addresidue( mol, sname, &res ); initres();
	  n_atab = 0;
	}

	nchains++;
	if( cid == ' ' )
	  sprintf( sname, "%d", nchains );
	else{
	  sname[ 0 ] = cid;
	  sname[ 1 ] = '\0';
	}
	addstrand( mol, sname );

{	/* temp, will do right soon!	*/
	STRAND_T	*sp, *spl;

	for( sp = NULL, spl = mol->m_strands; spl; spl = spl->s_next ){
		if( !strcmp( spl->s_strandname, sname ) ){
			sp = spl;
			break;
		}
	}
	sp->s_res_size = 10000;	/* memory is cheap */
	sp->s_residues =
		(RESIDUE_T **)malloc(sp->s_res_size*sizeof(RESIDUE_T));
	if( sp->s_residues == NULL ){
		/* too big, die, shouldn't happen	*/
	}
}

	l_cid = cid;
	strcpy( l_rname, rname );
	strcpy( l_rid, rid );
	l_rnum = rnum;

      }else if( isnewres( l_rname, rname, l_rnum, rnum ) ){
	strcpy( res.r_resname, l_rname );
	strcpy( res.r_resid, l_rid );
	res.r_num = l_rnum;
	res.r_natoms = n_atab;
	makebonds( &res );
	addresidue( mol, sname, &res ); initres();
	strcpy( l_rname, rname );
	strcpy( l_rid, rid );
	l_rnum = rnum;
	n_atab = 0;
      }
      ap = &atab[ n_atab ];
      NAB_initatom( ap, 0 );
      strcpy( ap->a_atomname, aname );
      ap->a_attr = 0;
      ap->a_residue = NULL;
      ap->a_pos[ 0 ] = x;
      ap->a_pos[ 1 ] = y;
      ap->a_pos[ 2 ] = z;
      ap->a_charge   = q;
      ap->a_radius   = r;
      ap->a_occ      = occ;
      ap->a_bfact    = bfact;
      n_atab++;

    }else if( strncmp( "TER", line, 3 ) == 0 ){ /* new strand */
      if( n_atab > 0 ){
	strcpy( res.r_resname, l_rname );
	strcpy( res.r_resid, l_rid );
	res.r_num = l_rnum;
	res.r_natoms = n_atab;
	makebonds( &res );
	addresidue( mol, sname, &res ); initres();
	n_atab = 0;
      }
      l_cid = '\0';
      *l_rname = '\0';
      *l_rid = '\0';
      l_rnum = 0;
    }else if( strncmp( "END", line, 3 ) == 0 )
      break;
  }
  if( n_atab > 0 ){
    strcpy( res.r_resname, l_rname );
    strcpy( res.r_resid, l_rid );
    res.r_num = l_rnum;
    res.r_natoms = n_atab;
    makebonds( &res );
    addresidue( mol, sname, &res ); initres();
  }

  mol->m_nvalid = 0;

  return( mol );
}

static	MOLECULE_T	*fgetcif( FILE *fp, char *blockId )
{
#ifdef MMCIF

  MOLECULE_T	*mol;
  ATOM_T		*ap;
  char	l_cid = '\0', cid;
  int		nchains;
  char	line[ 100 ];
  char	sname[ 10 ];
  char	aname[ 10 ];
  char	cids[ 10 ];  /* temp. string to hold chain-id  */
  char	rname[ 10 ], l_rname[ 10 ];
  char	rid[ 10 ], l_rid[ 10 ]; /* cols 22-27 */
  char	*np, *np1;
  int		rnum, l_rnum = 0;
  REAL_T	x, y, z;
  int		i, stat, iCat;
  int		col1, col2, col3, col4, col5, col6, col7, col8, col9;
  int		iblock;
  char	temp;

  fprintf( stderr, "in fgetcif\n" );
  *l_rname = '\0';
  NAB_initres( &res );
  init_atab();
  mol = newmolecule();

  /*
  **   Use the cifparse framework to extract information from an mmCIF file:
  */
  ndb_cif_init();
  fprintf( stderr, "back from ndb_cif_init()\n" );

  stat = ndb_cif_read_file(fp);
  fprintf( stderr, "back from ndb_cif_read_file()\n" );

  /*  Check for target data block.  */
  iblock = -1;
  for (i=0; i< cifFiles.numDatablock; i++) {
    if (!strcmp(blockId,cifFiles.datablocks[i].datablockName)) {
      iblock = i;
      break;
    }
  }
  if (iblock == -1 ) {
    fprintf( stderr,"Target data block %s is not in file\n", blockId);
    exit( 1 );
  }
  iCat = get_category_index(iblock,"atom_site");
  if (iCat == -1) {
    fprintf( stderr,"Target category %s is not in data block %d\n",
	     "atom_site",iblock);
    exit( 1 );
  }
  fprintf( stderr,"Read %d rows in category %d\n",
	   cifFiles.datablocks[iblock].categories[iCat].numRow, iCat);

  col1 = get_column_index(iblock,iCat,"id");             /* atom num. */
  col2 = get_column_index(iblock,iCat,"label_atom_id");  /* atom name */
  col3 = get_column_index(iblock,iCat,"label_comp_id");  /* res. name */
  col4 = get_column_index(iblock,iCat,"label_asym_id");  /* chain id  */
  col5 = get_column_index(iblock,iCat,"label_seq_id");   /* abs. res. no.*/
  col6 = get_column_index(iblock,iCat,"Cartn_x");        /* x-coord */
  col7 = get_column_index(iblock,iCat,"Cartn_y");        /* y-coord */
  col8 = get_column_index(iblock,iCat,"Cartn_z");        /* z-coord */
  col9 = get_column_index(iblock,iCat,"auth_seq_id");   /* res. no. */

  /*  --- need some error processing here!  */

  nchains = 0;
  n_atab = 0;

  for (i=0; i < cifFiles.datablocks[iblock].categories[iCat].numRow; i++) {

    strcpy(aname,
	   cifFiles.datablocks[iblock].categories[iCat].rows[i].columns[col2]);

    /* strip blanks: */
    for( np = np1 = aname; *np; np++ ){
      if( *np != ' ' ) *np1++ = *np;
    }
    *np1 = '\0';

    /* rotate initial digit: */
    if( aname[0] == '1' || aname[0] == '2' ||
	aname[0] == '3' ){
      temp = aname[0];
      for( i=0; i<strlen(aname)-1; i++ )
	aname[i] = aname[i+1];
      aname[i] = temp;
    }
				
    strcpy(rname,
	   cifFiles.datablocks[iblock].categories[iCat].rows[i].columns[col3]);

    for( np = np1 = rname; *np; np++ ){
      if( *np != ' ' ) *np1++ = *np;
    }
    *np1 = '\0';

    *rid = '\0';
    /*
      Note: the idea of rid is to contain what was originally in columns
      22-27 of the pdb file; we should be able to reconstruct this,
      but for now, will leave it empty: this will cause putpdb to
      ignore it.
      strcpy(rid,
      cifFiles.datablocks[iblock].categories[iCat].rows[i].columns[col9]);
    */

    /*
      In pdb files, chain id's are single characters, but it is not
      clear that this is enforced in mmCIF files; hence, we'll take
      the first character as our chain-id for NAB purposes.
    */
    strcpy(cids,
	   cifFiles.datablocks[iblock].categories[iCat].rows[i].columns[col4]);
    cid = cids[0];

    rnum = atoi(
		cifFiles.datablocks[iblock].categories[iCat].rows[i].columns[col5]);

    x = atof(
	     cifFiles.datablocks[iblock].categories[iCat].rows[i].columns[col6]);

    y = atof(
	     cifFiles.datablocks[iblock].categories[iCat].rows[i].columns[col7]);

    z = atof(
	     cifFiles.datablocks[iblock].categories[iCat].rows[i].columns[col8]);

    if( cid != l_cid ){
      if( n_atab > 0 ){
	strcpy( res.r_resname, l_rname );
	strcpy( res.r_resid, l_rid );
	res.r_num = l_rnum;
	res.r_natoms = n_atab;
	makebonds( &res );
	addresidue( mol, sname, &res ); NAB_initres( &res );
	n_atab = 0;
      }

      nchains++;
      if( cid == ' ' )
	sprintf( sname, "%d", nchains );
      else{
	sname[ 0 ] = cid;
	sname[ 1 ] = '\0';
      }
      addstrand( mol, sname );

{	/* temp, will do right soon!	*/
	STRAND_T	*sp, *spl;

	for( sp = NULL, spl = mol->m_strands; spl; spl = spl->s_next ){
		if( !strcmp( spl->s_strandname, sname ) ){
			sp = spl;
			break;
		}
	}
	sp->s_res_size = 10000;	/* memory is cheap */
	sp->s_residues =
		(RESIDUE_T **)malloc( sp->s_res_size*sizeof(RESIDUE_T) );
	if( sp->s_residues == NULL ){
		/* too big, die, shouldn't happen	*/
	}
}

      l_cid = cid;
      strcpy( l_rname, rname );
      strcpy( l_rid, rid );
      l_rnum = rnum;

    }else if( isnewres( l_rname, rname, l_rnum, rnum ) ){

      strcpy( res.r_resname, l_rname );
      strcpy( res.r_resid, l_rid );
      res.r_num = l_rnum;
      res.r_natoms = n_atab;
      makebonds( &res );
      addresidue( mol, sname, &res ); NAB_initres( &res );
      strcpy( l_rname, rname );
      strcpy( l_rid, rid );
      l_rnum = rnum;
      n_atab = 0;

    }
    ap = &atab[ n_atab ];
    NAB_initatom( ap, 0 );
    strcpy( ap->a_atomname, aname );
    ap->a_attr = 0;
    ap->a_residue = NULL;
    ap->a_pos[ 0 ] = x;
    ap->a_pos[ 1 ] = y;
    ap->a_pos[ 2 ] = z;
    n_atab++;
  }
  if( n_atab > 0 ){
    strcpy( res.r_resname, l_rname );
    strcpy( res.r_resid, l_rid );
    res.r_num = l_rnum;
    res.r_natoms = n_atab;
    makebonds( &res );
    addresidue( mol, sname, &res ); NAB_initres( &res );
  }

  return( mol );
#else
  fprintf( stderr, "getcif routine not implemnted in this version!\n" );
  fprintf( stderr, "...compile cifparse.a, and NAB with -DMMCIF\n" );
  exit( 1 );
  return( NULL );
#endif
}

static	int	isnewres( char l_rname[], char rname[], int l_rnum, int rnum )
{

  return( strcmp( l_rname, rname ) || l_rnum != rnum );
}

static	void	initres( void )
{

  NAB_initres( &res, 0 );
  if( res.r_resname == NULL ){
    res.r_resname = ( char * )malloc(R_NAME_SIZE*sizeof(char));
    if( res.r_resname == NULL ){
      fprintf( stderr,
	       "initres: can't allocate res->r_resname.\n" );
      exit( 1 );
    }
  }
  if( res.r_resid == NULL ){
    res.r_resid = ( char * )malloc(R_NAME_SIZE*sizeof(char));
    if( res.r_resid == NULL ){
      fprintf( stderr,
	       "initres: can't allocate res->r_resid.\n" );
      exit( 1 );
    }
  }
  res.r_natoms = n_atab;
  res.r_atoms = atab;
}

static	void	init_atab( void )
{
  static	int	init = 1;
  int	a;
  ATOM_T	*ap;

  if( !init )
    return;
  for( ap = atab, a = 0; a < ATAB_SIZE; a++, ap++ ){
    ap->a_atomname = (char *)malloc(A_NAME_SIZE*sizeof(char));
    if( ap->a_atomname == NULL ){
      fprintf( stderr,
	       "init_atab: can't alloc a_atomname.\n" );
      exit( 1 );
    }
    *ap->a_atomname = '\0';
    ap->a_atomtype = NULL;
    ap->a_element = NULL;
    ap->a_fullname = NULL;
  }
  init = 0;
}

static	void	makebonds( RESIDUE_T *res )
{
  int	a1, a2;
  ATOM_T	*ap1, *ap2;
  int	ih1, ih2;
  REAL_T	d;

  for( a1 = 0; a1 < res->r_natoms; a1++ ){
    ap1 = &res->r_atoms[ a1 ];
    ap1->a_nconnect = 0;
  }

  for( a1 = 0; a1 < res->r_natoms - 1; a1++ ){
    ap1 = &res->r_atoms[ a1 ];
    if( isdigit( *ap1->a_atomname ) )
      ih1 = ap1->a_atomname[1]=='H' ||
	ap1->a_atomname[1]=='h';
    else
      ih1 = *ap1->a_atomname == 'H' || *ap1->a_atomname=='h';
    for( a2 = a1 + 1; a2 < res->r_natoms; a2++ ){
      ap2 = &res->r_atoms[ a2 ];
      if( isdigit( *ap2->a_atomname ) )
	ih2 = ap2->a_atomname[1]=='H' ||
	  ap2->a_atomname[1]=='h';
      else
	ih2 = *ap2->a_atomname=='H' ||
	  *ap2->a_atomname=='h';
      d = dist( ap1, ap2 );
      if( ih1 || ih2 ){
	if( d <= BH_THRESH ){
	  if( ap1->a_nconnect < A_CONNECT_SIZE &&
	      ap2->a_nconnect < A_CONNECT_SIZE){
	    ap1->a_connect[ap1->a_nconnect++]=a2;
	    ap2->a_connect[ap2->a_nconnect++]=a1;
	  }
	}
      }else if( d <= B_THRESH ){
	if(ap1->a_nconnect < A_CONNECT_SIZE &&
	   ap2->a_nconnect < A_CONNECT_SIZE ){
	  ap1->a_connect[ap1->a_nconnect++] = a2;
	  ap2->a_connect[ap2->a_nconnect++] = a1;
	}
      }
    }
  }
}

static	REAL_T	dist( ATOM_T *ap1, ATOM_T *ap2 )
{
  REAL_T	dx, dy, dz;

  dx = ap1->a_pos[ 0 ] - ap2->a_pos[ 0 ];
  dy = ap1->a_pos[ 1 ] - ap2->a_pos[ 1 ];
  dz = ap1->a_pos[ 2 ] - ap2->a_pos[ 2 ];
  return( sqrt( dx * dx + dy * dy + dz * dz ) );
}

static	void	fputpdb( FILE *fp, MOLECULE_T *mol, char *options )
     /*
   Options:

      -pqr:  add charges and radii after the xyz coordinates
   -nobocc:  don't add occupancies and b-factors after the xyz coordinates
             (implied if -pqr is present)
    -brook:  use Broohaven atom/residue names, else use whatever is there
       -tr:  use residue numbers that do not restart at each chain
    -nocid:  do not put put chain id's in the output

*/
{
  int	r, tr, rn, a, ta;
  char	cid;
  STRAND_T	*sp;
  RESIDUE_T	*res;
  ATOM_T		*ap;
  char	rname[ R_NAME_SIZE ];
  char	aname[ A_NAME_SIZE ];
  char	rid[ PDB_RID_SIZE ];
  char	loptions[ 256 ];

  if( !mol ){
    fprintf( stderr, "fputpdb: NULL molecule\n" );
    return;
  }
  if( !fp ){
    fprintf( stderr, "fputpdb: NULL file pointer\n" );
    return;
  }

  if( options == NULL ) *loptions = '\0';
  else { strncpy( loptions, options, 255 ); loptions[255] = '\0'; }

  cid = ( mol->m_nstrands > 1 ) ? 'A' : ' ';
	
  for( ta = 0, tr = 0, sp = mol->m_strands; sp; sp = sp->s_next ){
    /*
    **		fprintf( fp, "REMARK %s strand has %d residues\n",
    **			sp->s_name, sp->s_nresidues );
    **		for( r = 0; r < sp->s_nresidues; r++ )
    **			fprintf( fp, "REMARK   RES %3d %s %d atoms\n",
    **				r + 1, sp->s_residues[ r ]->r_resname,
    **				sp->s_residues[ r ]->r_natoms );
    */
    for( r = 0; r < sp->s_nresidues; r++, tr++ ){
      res = sp->s_residues[ r ];
      strcpy( rid, res->r_resid ? res->r_resid : "" );
      if( strstr( loptions, "-brook" ) )
	mk_brook_rname( rname, res );
      else
	strcpy( rname, res->r_resname );
      for( a = 0; a < res->r_natoms; a++ ){
	ta++;
	ap = &res->r_atoms[ a ];

	if( strstr( loptions, "-brook") )
	  mk_brook_aname( aname, ap->a_atomname, rname );
	else
	  strcpy( aname, ap->a_atomname );

	if( !strcmp( rid, "" ) || strstr( loptions, "-tr" ) ){
	  /* id string is empty or "-tr" is set: use residue numbers & chain id on output */
				/* amber numbers are total number of residues	*/
				/* without resetting at chain ids.  Ordinary	*/
				/* rn's begin at 1 for each chain		*/
	  rn = strstr( loptions, "-tr" ) ? tr + 1 : r + 1;
	  if( ta < 100000 ){
	    fprintf( fp,
		     "ATOM  %5d %-4s %-3s %c%4d    %8.3f%8.3f%8.3f",
		     ta, aname, rname,
		     strstr( loptions, "-nocid" ) ? ' ' : cid, rn,
		     ap->a_pos[ 0 ],
		     ap->a_pos[ 1 ],
		     ap->a_pos[ 2 ] );
	  }else{
	    fprintf( fp,
		     "ATOM  %05d %-4s %-3s %c%4d    %8.3f%8.3f%8.3f",
		     ta % 100000, aname, rname,
		     strstr( loptions, "-nocid" ) ? ' ' : cid, rn,
		     ap->a_pos[ 0 ],
		     ap->a_pos[ 1 ],
		     ap->a_pos[ 2 ] );
	  }
	} else {
				/*  use the resid field from the input pdb file  */
	  if( ta < 100000 ){
	    fprintf( fp,
		     "ATOM  %5d %-4s %3s %-6s   %8.3f%8.3f%8.3f",
		     ta, aname, rname, rid,
		     ap->a_pos[ 0 ],
		     ap->a_pos[ 1 ],
		     ap->a_pos[ 2 ] );
	  }else{
	    fprintf( fp,
		     "ATOM  %05d %-4s %3s %-6s   %8.3f%8.3f%8.3f",
		     ta % 100000, aname, rname, rid,
		     ap->a_pos[ 0 ],
		     ap->a_pos[ 1 ],
		     ap->a_pos[ 2 ] );
	  }
	}
	if( strstr( loptions, "-pqr" ) ) {
	  /* this format matches Beroza stuff:  */
	  fprintf( fp, "%10.5f%10.5f   ",
		   ap->a_charge, ap->a_radius );
	} else if( !strstr( loptions, "-nobocc" ) ) {
	  fprintf( fp, "%6.2f%6.2f           ",
		   ap->a_occ, ap->a_bfact );
	}

	if( strstr( loptions, "-brook" ) && 
	    !strstr( loptions, "-pqr" ) && 
	    ! strstr( loptions, "-nobocc" ) )
	  fprintf( fp, "%.1s  ", ap->a_atomname );

	fprintf( fp, "\n" );

      }  /* end loop over atoms in this residue  */

    }  /* end loop over residues in this strand  */

    fprintf( fp, "TER\n" );
    if( mol->m_nstrands > 1 ) cid++;

  }  /* end loop over strands  */
}

static	void	mk_brook_rname( char rname[], RESIDUE_T *res )
{

  strcpy( rname, res->r_resname );

  if( !strcmp( rname, "GUA" ) ) strcpy( rname, "  G" );
  if( !strcmp( rname, "ADE" ) ) strcpy( rname, "  A" );
  if( !strcmp( rname, "THY" ) ) strcpy( rname, "  T" );
  if( !strcmp( rname, "CYT" ) ) strcpy( rname, "  C" );
  if( !strcmp( rname, "URA" ) ) strcpy( rname, "  U" );

  if( !strcmp( rname, "DG" ) )  strcpy( rname, "  G" );
  if( !strcmp( rname, "DA" ) )  strcpy( rname, "  A" );
  if( !strcmp( rname, "DT" ) )  strcpy( rname, "  T" );
  if( !strcmp( rname, "DC" ) )  strcpy( rname, "  C" );

  if( !strcmp( rname, "RG" ) )  strcpy( rname, "  G" );
  if( !strcmp( rname, "RA" ) )  strcpy( rname, "  A" );
  if( !strcmp( rname, "RU" ) )  strcpy( rname, "  U" );
  if( !strcmp( rname, "RC" ) )  strcpy( rname, "  C" );

  if( !strcmp( rname, "DG3" ) ) strcpy( rname, "  G" );
  if( !strcmp( rname, "DA3" ) ) strcpy( rname, "  A" );
  if( !strcmp( rname, "DT3" ) ) strcpy( rname, "  T" );
  if( !strcmp( rname, "DC3" ) ) strcpy( rname, "  C" );

  if( !strcmp( rname, "RG3" ) ) strcpy( rname, "  G" );
  if( !strcmp( rname, "RA3" ) ) strcpy( rname, "  A" );
  if( !strcmp( rname, "RU3" ) ) strcpy( rname, "  U" );
  if( !strcmp( rname, "RC3" ) ) strcpy( rname, "  C" );

  if( !strcmp( rname, "DG5" ) ) strcpy( rname, "  G" );
  if( !strcmp( rname, "DA5" ) ) strcpy( rname, "  A" );
  if( !strcmp( rname, "DT5" ) ) strcpy( rname, "  T" );
  if( !strcmp( rname, "DC5" ) ) strcpy( rname, "  C" );

  if( !strcmp( rname, "RG5" ) ) strcpy( rname, "  G" );
  if( !strcmp( rname, "RA5" ) ) strcpy( rname, "  A" );
  if( !strcmp( rname, "RU5" ) ) strcpy( rname, "  U" );
  if( !strcmp( rname, "RC5" ) ) strcpy( rname, "  C" );

  if( !strcmp( rname, "HID" ) ) strcpy( rname, "HIS" );
  if( !strcmp( rname, "HID" ) ) strcpy( rname, "HIS" );
  if( !strcmp( rname, "HIP" ) ) strcpy( rname, "HIS" );
  if( !strcmp( rname, "CYX" ) ) strcpy( rname, "CYS" );
  if( !strcmp( rname, "ASH" ) ) strcpy( rname, "ASP" );
  if( !strcmp( rname, "GLH" ) ) strcpy( rname, "GLU" );

}

static	void	mk_brook_aname( char aname[], char name[], char rname[] )
{

  /* Following works for single-character element names, like H, C, N, O, S, P */


  aname[0] = ' ';     /* Brookhaven names start with space ....  */
  /*  ... except when a four-character name is "folded": */
  if( strlen( name ) >= 4 && name[3] != ' ' ) aname[0] = name[3];
  aname[1] = name[0];
  aname[2] = name[1];
  aname[3] = name[2];
  if( aname[3] == '\'' ) aname[3] = '*';  /* for nucleic acids */
  aname[4] = '\0';

  /*  Now fold certain numbers back to the first space:  */
  if( !strncmp( aname, " H", 2 ) &&
      ( aname[3] == '1' || aname[3] == '2' || aname[3] == '3' ) &&
      ( !strncmp( aname, " HB", 3 ) ||
	( rname != "PHE" && rname != "TYR" && rname != "TRP"
	  && strncmp( rname, "HI", 2 ) ) ) ){
    aname[0] = aname[3];
    aname[3] = ' ';
  }

  /*   We should now figure out and process two-character element names, but
       will not do this now.....    */

}

/*   Write out the molecule information in an "mmCIF-like" format; this
     should come close to being correct, but probably will not be acceptable
     to all mmCIF parsers.  For now, it should just be considered as a 
     prototype.  In particular, no changes are made to atom or residue
     names:  whatever is in the molecule will be used.                   */

static	void	fputcif( FILE *fp, char *blockId, MOLECULE_T *mol )
{
  int	r, tr, a, ta, strandnum;
  char	cid;
  STRAND_T	*sp;
  RESIDUE_T	*res;
  ATOM_T		*ap;

  assert( mol );
  assert( fp );

  cid = ( mol->m_nstrands > 1 ) ? 'A' : '.';
	
  /* simple mmCIF header: */

  fprintf( fp, "data_%s\n\n", blockId );
  fprintf( fp, "###########\n## ENTRY ##\n###########\n\n" );
  fprintf( fp, "_entry.id       %s\n\n", blockId );
  fprintf( fp, "###############\n## ATOM_SITE ##\n###############\n\n" );
  fprintf( fp, "loop_\n_atom_site.id\n_atom_site.label_atom_id\n" );
  fprintf( fp, "_atom_site.label_comp_id\n_atom_site.label_asym_id\n" );
  fprintf( fp, "_atom_site.auth_seq_id\n_atom_site.cartn_x\n" );
  fprintf( fp, "_atom_site.cartn_y\n_atom_site.cartn_z\n" );
  fprintf( fp, "_atom_site.label_entity_id\n_atom_site.label_seq_id\n" );

  strandnum = 0;
  for( ta = 0, tr = 0, sp = mol->m_strands; sp; sp = sp->s_next ){
    strandnum++;
    for( r = 0; r < sp->s_nresidues; r++, tr++ ){
      res = sp->s_residues[ r ];
      for( a = 0; a < res->r_natoms; a++ ){
	ta++;
	ap = &res->r_atoms[ a ];


				/* following examples from NDB:
				** auth_seq_id is the "absolute" residue number;
				** label_seq_id starts over from 1 at each new "strand"  
				*/

	fprintf( fp, "%5d %-4s %3s %c %4d %8.3f %8.3f %8.3f %3d %4d\n",
		 ta, ap->a_atomname, res->r_resname, cid, tr+1, 
		 ap->a_pos[ 0 ], ap->a_pos[ 1 ], ap->a_pos[ 2 ],
		 strandnum, r+1 );
      }
    }
    if( mol->m_nstrands > 1 )
      cid++;
  }
}