/************************************************************************\
*                                                                        *
*                                NASAWASH                                *
*             NASA-Format Keplerian Element Set File Cleanup             *
*                                                                        *
* Copyright 1995 Paul Williamson, KB5MU.  All Rights Reserved.           *
* You may use this program freely for non-commercial purposes.  This     *
* program may be distributed freely, provided that it is distributed     *
* without modification and with all the associated documentation files.  *
*                                                                        *
* NASAWASH extracts valid NASA format 2-line Keplerian element sets      *
* from a text file, which may contain other text of various kinds.  It   *
* is intended to remove the miscellaneous text commonly added by         *
* network e-mail and bulletin transmission.  It can remove lines of      *
* text before, after, or between element sets, but not between the       *
* lines of a single element set.  It can remove text on the same lines   *
* as the element sets, before or after the element sets, provided that   *
* the three lines of the element set all begin in the same column.       *
*                                                                        *
* NASAWASH insists that the element sets contain valid checksums, but    *
* it will accept either of the two common checksum algorithms in use.    *
* The element sets output by NASAWASH contain no "+" signs, so they      *
* will be accepted by either checksum algorithm.  Except for the format  *
* and the checksum, NASAWASH does not attempt to interpret the element   *
* set.                                                                   *
*                                                                        *
* NASAWASH doesn't care what end-of-line terminators your input file     *
* uses, so you don't need to worry about converting between Unix (LF),   *
* MS-DOS (CR+LF), or Macintosh (CR) formats.  The output file always     *
* uses the standard convention for text files on your system.            *
*                                                                        *
* $Header: g:/c/nasawash/rcs/nasawash.c 1.1 95/05/21 16:41:04 ptw Exp $                                                               *
*                                                                        *
\************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define  MAXLINE     1024     /* Maximum text line buffered.  Longer lines
                                 are OK, but if a valid element set spans
                                 this boundary, it won't be found. */

#define  ELSET_BAD   0        /* Status return values */
#define  ELSET_OK    1

typedef  struct               /* Line buffer type */
   {
   int   len;
   char  text[MAXLINE];
   } linebuf_type;

linebuf_type   lines[3];      /* Rolling buffer of three text lines */

/************************************************************************\
*                                                                        *
* checksum_line                                                          *
*                                                                        *
* This function checks the NASA checksum on a single line.  It accepts   *
* either the "old" algorithm ("+" counts as 0) or the "new" algorithm    *
* ("+" counts as 2).                                                     *
*                                                                        *
* Returns  ELSET_OK  if the checksum matches either algorithm's output,  *
*          ELSET_BAD if not.                                             *
*                                                                        *
\************************************************************************/

char checksum_line(char *buf, char check)
{
int   col;              /* Column counter */
int   sum = 0;          /* old-algorithm checksum computed */
int   plus_total = 0;   /* Delta due to "+" signs in new algorithm */
char  chr;              /* Current character being added to checksum */

for (col = 0; col < 68; col++)         /* Scan through the line */
   {
   chr = buf[col];                           /* Get the character */
   if (isdigit(chr))                         /* Digits count as their value */
      sum += chr - '0';
   else if (chr == '-')                      /* "-" counts as 1 */
      sum += 1;
   else if (chr == '+')                      /* "+" may count as 2 */
      plus_total += 2;
   }

if ((check - '0') == (sum % 10))       /* Try the old algorithm check */
   return ELSET_OK;
else if ((check - '0') == ((sum + plus_total) % 10))  /* New style */
   return ELSET_OK;
else
   return ELSET_BAD;          
}

/********************************************************************\
*                                                                    *
* confirm_elset                                                      *
*                                                                    *
* This function checks a proposed element set in the lines array     *
* against a template to validate it.  The template includes minimum  *
* criteria for a valid 2-line element set, including a valid         *
* checksum on each of the two data lines.                            *
*                                                                    *
* Call this function with a column number within the lines array.    *
* That column must exist in all three lines.                         *
*                                                                    *
\********************************************************************/

int confirm_elset(int col_offset)
{
static const char  *template[3] =
   {
   "*E",
   "1 xxxxdl xxxxxxxx ddxxd.dddddddd x.dxxxxxxx  xxxxxxx xxxxxxxxxxxxxxxCE",
   "2 ddddd xxd.dxxx xxd.dxxx dxxxxxx xxd.dxxx xxd.dxxx xd.dxxxxxxxxxxxxCE"
   };

int   line;
int   tcol;
char  template_char;
char  lines_char;

for (line = 0; line < 3; line++)
   {
   for (tcol=0; template[line][tcol] != 'E'; tcol++)
      {
      template_char = template[line][tcol];
      lines_char = lines[line].text[tcol+col_offset];

      switch (template_char)
         {
         case ' ':      /* These template characters must match literally */
         case '1':
         case '2':
         case '.':
            if (lines_char != template_char)
               return ELSET_BAD;
            break;

         case 'd':      /* This template character must match a digit */
            if (!isdigit(lines_char))
               return ELSET_BAD;
            break;

         case 'l':      /* This template character must match a letter or space */
            if (!isalpha(lines_char) && (lines_char != ' '))
               return ELSET_BAD;
            break;

         case '*':      /* This templace character must match a printable */
            if (!isprint(lines_char))
               return ELSET_BAD;
            break;

         case 'x':      /* This template character matches almost anything */
            if (! (isalnum(lines_char) || strchr("+-. ", lines_char)))
               return ELSET_BAD;
            break;

         case 'C':      /* This template character must match a digit, AND
                           be the checksum of the previous 68 positions */
            if (!isdigit(lines_char))
               return ELSET_BAD;

            if (checksum_line(&(lines[line].text[col_offset]), lines_char)
                != ELSET_OK)
               return ELSET_BAD;
            break;

         default:
            printf("Fatal error: bad case.\n");
            exit(1);
         }/* switch on template character */
      }/* for column */
   }/* for line */

return ELSET_OK;
}

/*****************************************************************\
*                                                                 *
* update_checksum                                                 *
*                                                                 *
* Given a line from an element set, this function updates the     *
* checksum.  If there are any "+" signs in the line, it replaces  *
* them with spaces to avoid any problems with programs that       *
* disagree on the checksum algorithm for spaces.                  *
*                                                                 *
\*****************************************************************/

void update_checksum(char *buf)
{
int   col;
int   sum = 0;
char  chr;

buf[69] = '\0';
for (col = 0; col < 68; col++)         /* Scan through the line */
   {
   chr = buf[col];                           /* Get the character */
   if (isdigit(chr))                         /* Digits count as their value */
      sum += chr - '0';
   else if (chr == '-')                      /* "-" counts as 1 */
      sum += 1;
   else if (chr == '+')                      /* "+"'s replaced with spaces */
      buf[col] = ' ';
   }
buf[68] = (char) ((sum % 10) + '0');         /* Put in new checksum */
}

/************************************************************\
*                                                            *
* output_elset                                               *
*                                                            *
* Given a column offset into the lines array, this function  *
* outputs the element set (with fresh checksums) to outfile.  *
*                                                            *
\************************************************************/

void output_elset(int col, FILE *outfile)
{
char  buf[70];
char  *p;

      /* Trim the name */
strncpy(buf, (const char *)&(lines[0].text[col]), 69);
buf[69] = '\0';
if (lines[0].len - col < 69)
   buf[lines[0].len - col] = '\0';
if ((p = strchr(buf, '\t')) != NULL)
   *p = '\0';
if ((p = strstr(buf, "  ")) != NULL)
   *p = '\0';

      /* Output the name line */
fputs(buf, outfile);
fputc('\n', outfile);

      /* Get the first data line, update checksum */
strncpy(buf, (const char *)&(lines[1].text[col]), 69);
update_checksum(buf);
fputs(buf, outfile);
fputc('\n', outfile);

      /* Get the second data line, update checksum */
strncpy(buf, (const char *)&(lines[2].text[col]), 69);
update_checksum(buf);
fputs(buf, outfile);
fputc('\n', outfile);
}

/******************************************************************\
*                                                                  *
* get_line                                                         *
*                                                                  *
* This function gets a line of text from an input file into a      *
* linebuf.  It works with any combination of CR and LF for end     *
* of line terminators.  Lines containing no characters at all      *
* (runs of CR and/or LF) are skipped silently.  The line           *
* terminator is not put in the buffer.                             *
*                                                                  *
* If the maximum buffer size is reached before a line terminator,  *
* a maximum size line is returned as if a line terminator had      *
* been seen.                                                       *
*                                                                  *
* Returns the number of characters placed in the buffer.  This     *
* can only be zero if EOF or an error occurred.                    *
*                                                                  *
\******************************************************************/

int get_line(linebuf_type *line, FILE *infile)
{
int      count;            /* Index of next position in line buffer */
char     chr;              /* Character currently being copied or examined */

do {                       /* Eat up any leftover end-of-line characters */
   chr = (char) getc(infile);
   }
   while (chr == '\n' || chr == '\r');
                           /* (Let first chr fall through to the main loop) */

                           /* Handle EOF or failure case with no text read */
if (feof(infile) || ferror(infile))
   {
   line->len = 0;
   return 0;
   }

                           /* Copy all non-EOL characters into line buf */
for ( count = 0; chr != '\n' && chr != '\r' && count < MAXLINE; count++ )
   {
   line->text[count] = chr;

   chr = (char) getc(infile);
   if (feof(infile) || ferror(infile))
      break;
   }

line->len = count;

return count;
}

/**********************************************************\
*                                                          *
* roll_lines                                               *
*                                                          *
* This function scrolls the window of three lines up one,  *
* by copying the last two lines into the first two lines   *
* and zeroing out the last line.                           *
*                                                          *
\**********************************************************/

void roll_lines(void)
{
memcpy(lines[0].text, lines[1].text, lines[1].len);
lines[0].len = lines[1].len;

memcpy(lines[1].text, lines[2].text, lines[2].len);
lines[1].len = lines[2].len;

lines[2].len = 0;
}


/******************************************************************\
*                                                                  *
* find_elset                                                       *
*                                                                  *
* This function tries to find an element set in the lines array.   *
* It uses some quick rules to determine if an element set is       *
* possible, then calls confirm_elset() to completely validate the  *
* candidate element set.  If an element set is found, it calls     *
* output_elset() to output it.                                     *
*                                                                  *
* Returns the number of element sets found: 0 or 1.                *
*                                                                  *
\******************************************************************/

int find_elset(FILE *outfile)
{
int   last_start_col;
int   start_col;

	/* Figure out where the last possible starting column for an
           element set is.  Line 1 and 2 must have at least 69 characters
           of text to form the element set, and line 0 must have at least
           one character to form the satellite's name. */
last_start_col = lines[1].len - 69;
if (last_start_col > lines[2].len - 69)
   last_start_col  = lines[2].len - 69;
if (last_start_col > lines[0].len -  1)
   last_start_col  = lines[0].len - 1;

	/* For each possible column offset, check for the line numbers
           as a quick indicator of a possible element set.  If found,
           try to confirm the element set. */
for (start_col = 0; start_col <= last_start_col; start_col++)
   {
   if (lines[1].text[start_col] == '1' &&
       lines[2].text[start_col] == '2' &&
       confirm_elset(start_col) == ELSET_OK)
      {
      output_elset(start_col, outfile);	/* Found one!  Output it. */
      return 1;
      }
   }

return 0;		/* No element sets found */
}
      
/***************************************************************\
*                                                               *
* usage                                                         *
*                                                               *
* This function outputs the banner and help message to stderr.  *
*                                                               *
\***************************************************************/

void usage(void)
{

fprintf(stderr, "NASAWASH 0.50 %s %s\n", __DATE__, __TIME__);
fprintf(stderr, "Copyright 1995 Paul Williamson, KB5MU.  All rights reserved.\n");
fprintf(stderr, "You may use this program freely for non-commercial purposes.\n\n");
fprintf(stderr, "  usage:  nasawash [inputfile [outputfile]]\n");
fprintf(stderr, "     If no inputfile is specified, reads from stdin.\n");
fprintf(stderr, "     If no outputfile is specified, outputs to stdout.\n");
}

/*******\
*       *
* main  *
*       *
\*******/

int main(int argc, char **argv)
{
FILE	*infile;
FILE	*outfile;
int	filter_mode;
int	elset_count = 0;

if (argc < 2)
   {
   infile = stdin;
   outfile = stdout;
   filter_mode = 1;
   }
else if (argc > 3)
   {
   usage();
   exit(1);
   }
else
   {
      /* Open input file in binary mode so we can cope correctly with
         any kind of binary gibberish that happens to be in the file,
         including Ctrl-Z. */
   if ((infile = fopen(argv[1], "rb")) == NULL)
      {
      fprintf(stderr, "Could not open file %s: ", argv[1]);
      perror(NULL);
      usage();
      exit(1);
      }
   if (argc > 2)
      {
      if ((outfile = fopen(argv[2], "w")) == NULL)
         {
         fprintf(stderr, "Could not open output file %s: ", argv[2]);
         perror(NULL);
         usage();
         exit(1);
         }
      else
         filter_mode = 0;
      }
   else
      {
      outfile = stdout;
      filter_mode = 0;
      }
   }

setvbuf(infile,  NULL, _IOFBF, 8192);	/* Use some buffering for file I/O */
setvbuf(outfile, NULL, _IOFBF, 8192);

if (  get_line(&(lines[0]), infile) != 0
   && get_line(&(lines[1]), infile) != 0 )
   {
   while (get_line(&(lines[2]), infile) != 0)
      {
      if (find_elset(outfile) != 0)
         {
         ++elset_count;
         if (  get_line(&(lines[0]), infile) == 0
            || get_line(&(lines[1]), infile) == 0 )
            break;
         }
      else
         roll_lines();
      }
   }

if (!filter_mode)
   {
   if (elset_count == 0)
      fprintf(stderr, "No element sets found.\n");
   else if (elset_count == 1)
      fprintf(stderr, "Found 1 element set.\n");
   else
      fprintf(stderr, "Found %d element sets.\n", elset_count);
   }

if (ferror(infile))
   {
   perror("Error reading input file");
   exit(1);
   }
else if (fclose(infile))
   {
   perror("Error closing input file");
   exit(1);
   }
else if (ferror(outfile))
   {
   perror("Error writing output file");
   exit(1);
   }
else if (fclose(outfile))
   {
   perror("Error closing output file");
   exit(1);
   }
else
   return 0;
}