/*
 * sshread.c
 *
 * routines of input, and formatting according to the styles
 * Copyright (c) 1988, 89, 90, 91, 92, 93 Miguel Santana
 * Copyright (c) 1995, 96, 97, 98 Akim Demaille, Miguel Santana
 * $Id: sshread.c,v 1.63 1998/03/04 16:20:59 demaille Exp $
 */

/*
 * This file is part of a2ps.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; see the file COPYING.  If not, write to
 * the Free Software Foundation, 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

/*
 * $Id: sshread.c,v 1.63 1998/03/04 16:20:59 demaille Exp $
 */

#include "a2ps.h"
#include "sshread.h"
#include "ssheet.h"
#include "routines.h"
#include "buffer.h"
#include "jobs.h"
#include "fjobs.h"
#include "psgen.h"
#include "assert.h"
#include "quotearg.h"

/*
 * Use the information offered by main.c
 */
extern struct a2ps_job *job;

/*
 * Shortcut to call regex upon a buffer, and store in a token
 */
#define buffer_match(buffer,regex,token)			\
   re_match (regex,						\
	     (char *) buffer->value, buffer->len, buffer->curr,	\
	     token->registers)

/*
 * Structure in which is stored the result of a parsing
 */
struct token
  {
    struct re_registers *registers;
    struct darray *rhs;
  };

#define token_dest(_i_)	\
    ((struct faced_string *) token->rhs->content[_i_])

#define token_dest_fface(_i_)	\
    (token_dest(_i_)->face)

#define token_dest_face(_i_)	\
    (fface_get_face(token_dest_fface(_i_)))

#define token_dest_fflags(_i_)	\
    (fface_get_flags(token_dest_fface(_i_)))

static inline struct token *
token_new (void)
{
  struct token *res = XMALLOC (struct token, 1);
  res->registers = XMALLOC (struct re_registers, 1);
  res->registers->start = XMALLOC (regoff_t, 30);
  res->registers->end = XMALLOC (regoff_t, 30);
  return res;
}

static inline struct darray *
rhs_plain_new (void)
{
  return rhs_new_single (NULL, 0, Plain_fface);
}

static inline void
token_free (struct token *token)
{
  free (token);
}

/* Where the token and its attributes are stored */
static struct token *token = NULL;
#define token_set_registers(_start_, _len_)		\
 do {							\
  token->registers->start [0] = _start_;		\
  token->registers->end [0] = _start_ + _len_;		\
 } while (0)

#define token_start(_i_)		\
  token->registers->start [token_dest(_i_)->reg_ref]

#define token_end(_i_)		\
  token->registers->end [token_dest(_i_)->reg_ref]

static struct darray *plain_rhs = NULL;

/****************************************************************/
/*              pretty printing service routines                */
/****************************************************************/
/*
 * Eat characters as long as their in the 2nd alphabet
 * and we are in the buffer.
 */
static inline void
match_word (buffer_t * buffer, struct style_sheet *sheet)
{
  int start = buffer->curr;

  do
    buffer->curr++;
  while (sheet->alpha2[*(buffer->content + buffer->curr)]
	 && !buffer_is_empty (buffer));

  token->rhs = plain_rhs;
  token_dest (0)->face = Plain_fface;
  token->registers->start[0] = start;
  token->registers->end[0] = buffer->curr;
}

/****************************************************************/
/*                      lexical analysis routines               */
/****************************************************************/
#define word_regexp(_i_)	\
   (((struct rule *) words->regexps->content[_i_]))
/*
 * Return true if there is a element of WORDS which keywords-match
 * current point of BUFFER.  Fill TOKEN with the matching part.
 */
static inline int
match_keyword (buffer_t * buffer,
	       struct words *words,
	       uchar * alphabet)
{
  struct rule **key;
  uchar *string = buffer->value + buffer->curr;
  int i;
  int res;

  /* First try the words */
  if (words->min[*string])
    for (key = words->max[*string]
	 ; words->min[*string] <= key
	 ; key--)
      {
	if (ustrprefix ((*key)->word, string)
	    && !alphabet[string[ustrlen ((*key)->word)]])
	  {
	    token->rhs = (*key)->rhs;
	    token_set_registers (buffer->curr, ustrlen ((*key)->word));
	    buffer->curr += ustrlen ((*key)->word);
	    return 1;
	  }
      }

  /* Then the regexps, in reversed order (in order to take the
   * _last_ definition */
  for (i = (int) words->regexps->len - 1; i >= 0; i--)
    {
      res = buffer_match (buffer, word_regexp (i)->regex, token);
      switch (res)
	{
	case -2:
	  fprintf (stderr, "An error occured while matching\n");
	  break;
	case -1:
	  continue;
	default:
	  token->rhs = word_regexp (i)->rhs;
	  buffer->curr += res;
	  return 1;
	}
    }

  /* Report that nothing matches */
  return 0;
}

/*
 * Return true if there is a element of WORDS which operators-match
 * current point of BUFFER.  Fill TOKEN with the matching part.
 */
static inline int
match_operator (buffer_t * buffer,
		struct words *words)
{
  struct rule **key;
  uchar *string = buffer->value + buffer->curr;
  int i;
  int res;

  /* First the words */
  if (words->min[*string])
    for (key = words->max[*string]
	 ; words->min[*string] <= key
	 ; key--)
      {
	if (ustrprefix ((*key)->word, string))
	  {
	    token->rhs = (*key)->rhs;
	    token_set_registers (buffer->curr, ustrlen ((*key)->word));
	    buffer->curr += ustrlen ((*key)->word);
	    return 1;
	  }
      }

  /* Then the regexps, in reversed order (in order to take the
   * _last_ definition */
  for (i = (int) words->regexps->len - 1; i >= 0; i--)
    {
      res = buffer_match (buffer, word_regexp (i)->regex, token);
      switch (res)
	{
	case -2:
	  fprintf (stderr, "An error occured while matching\n");
	  break;
	case -1:
	  continue;
	default:
	  token->rhs = word_regexp (i)->rhs;
	  buffer->curr += res;
	  return 1;
	}
    }

  /* Report failure */
  return 0;
}

/*
 * If buffer+*curr begins with a sequence, return that sequence.
 * Otherwise NULL
 */
#ifdef SEQ
#undef SEQ
#endif
#define SEQ(_i_) 	\
   ((struct sequence *) sheet->sequences->content [i])
static inline struct sequence *
match_sequence (buffer_t * buffer, struct style_sheet *sheet)
{
  int i;
  int res;
  uchar *string = buffer->value + buffer->curr;

  /* In reversed order (in order to take the _last_ definition */
  for (i = (int) sheet->sequences->len - 1; i >= 0; i--)
    {
      if (SEQ (i)->open->regex)
	{
	  /* The regexp patterns */
	  res = buffer_match (buffer, SEQ (i)->open->regex, token);
	  switch (res)
	    {
	    case -2:
	      fprintf (stderr, "An error occured while matching\n");
	      break;
	    case -1:
	      continue;
	    default:
	      token->rhs = SEQ (i)->open->rhs;
	      buffer->curr += res;
	      return SEQ (i);
	    }
	}
      else
	{
	  /* It's a string */
	  if (ustrprefix (SEQ (i)->open->word, string))
	    {
	      token_set_registers (buffer->curr, ustrlen (SEQ (i)->open->word));
	      token->rhs = SEQ (i)->open->rhs;
	      buffer->curr += ustrlen (SEQ (i)->open->word);
	      return SEQ (i);
	    }
	}
    }
  return NULL;
}

/*
 * Put in token the token recognized.
 * The number of token read, 0 if nothing left
 */
static inline int
ssh_get_token (buffer_t * buffer, struct style_sheet *sheet)
{
  static int return_to_plain = false;
  /* NULL if not in a sequence currently */
  static struct sequence *sequence = NULL;

  if (buffer_is_empty (buffer))
    {
      buffer_get (buffer);

      /* We don't trust liba2ps for the line numbers, because
       * if a2ps skips some lines (e.g., --strip-level, or INVISIBLE),
       * liba2ps will number upon output lines, not imput lines,
       * which is what is expected */
      (CURRENT_FILE (job))->lines = buffer->line;

      if (buffer->len == 0)
	{
	  /* end of file: reset values */
	  /* If this is a new file, it must not depend on the trailling
	   * parameters of the previous file */
	  sequence = NULL;
	  return_to_plain = false;
	  return 0;
	}
    }

  if (return_to_plain)
    {
      return_to_plain = false;
      token->rhs = plain_rhs;
      token_dest (0)->face = Plain_fface;
    }

  if (sequence)
    {
      /* escape: not converted when in a sequence */
      if (match_operator (buffer, sequence->exceptions))
	return 1;
      /* end of sequence ? */
      if (match_operator (buffer, sequence->close))
	{
	  return_to_plain = true;
	  sequence = NULL;
	  return 1;
	}
      /* We are in a sequence not to be closed yet.
       * Advance of 1 char */
      token->rhs = plain_rhs;
      token_dest (0)->face = sequence->face;
      token->registers->start[0] = buffer->curr++;
      token->registers->end[0] = buffer->curr;
      return 1;
    }
  else
    {				/* (not in sequence) */
      if ((sequence = match_sequence (buffer, sheet)))
	return 1;
      else if (sheet->alpha1[buffer->content[buffer->curr]])
	{
	  /* we are in a word since this was a char belonging to the
	   * first alphabet */
	  if (match_keyword (buffer, sheet->keywords, sheet->alpha2)
	      || match_operator (buffer, sheet->operators))
	    {
	      return_to_plain = true;
	      return 1;
	    }
	  else
	    {
	      /* since some characters may be used inside an identifier
	       * (eg, x' = x in claire) but can also be used to open
	       * a special sequence (eg, 'x' in claire), then we must read
	       * the whole word, and print in.
	       */
	      match_word (buffer, sheet);
	      return 1;
	    }
	}
      else if (match_operator (buffer, sheet->operators))
	{
	  return_to_plain = true;
	  return 1;
	}
    }

  /* We did not recognize something special */
  token->rhs = plain_rhs;
  token->registers->start[0] = buffer->curr++;
  token->registers->end[0] = buffer->curr;
  return 1;
}

#define GRAB_TAG(_tag_)					\
  do {							\
    ustrncat (_tag_,					\
	      buffer->content + token_start (i),	\
	      token_end (i) - token_start (i));		\
  } while (0)

/*
 * ssh-Pretty print a file to postscript
 */
void
ssh_print_postscript (struct a2ps_job *Job,
		      buffer_t * buffer,
		      struct style_sheet *sheet)
{
  struct fface_s fface;
  /* To grab the encoding switching instruction */
  uchar bufenc[512];
  int grabbing_encoding = false;
  size_t i;

  *bufenc = '\0';
  fface = Plain_fface;

  /* I must do this because of the broken handling of the registers in
   * regex.  Until I find a way to ensure enough place in the
   * registers, without having to trust regex. */
  if (!token)
    {
      token = token_new ();
      plain_rhs = rhs_plain_new ();
    }

  while (ssh_get_token (buffer, sheet) != 0)
    for (i = 0; i < token->rhs->len; i++)
      {
	/* Is a new face ? */
	if (!fface_squ (fface, token_dest_fface (i)))
	  {
	    /* Reset dynamic markers */
	    if (token_dest_fflags (i) & ff_Tag1)
	      *Job->tag1 = '\0';
	    if (token_dest_fflags (i) & ff_Tag2)
	      *Job->tag2 = '\0';
	    if (token_dest_fflags (i) & ff_Tag3)
	      *Job->tag3 = '\0';
	    if (token_dest_fflags (i) & ff_Tag4)
	      *Job->tag4 = '\0';
	    if (grabbing_encoding
		&& (!(token_dest_fflags (i) & ff_Encoding)))
	      {
		/* Grabbing of the encoding name is completed */
		struct encoding *newenc;
		newenc = get_encoding_by_alias (job, (char *) bufenc);
/*          encoding_build_faces_wx (job, newenc); */
		if (!newenc)
		  error (0, 0, _ ("unknown encoding `%s', ignored"),
			 quotearg ((char *) bufenc));
		else
		  ps_switch_encoding (Job, newenc);
		*bufenc = '\0';
		grabbing_encoding = false;
	      }
	    fface = token_dest (i)->face;
	  }

	/* See if there are some information to grab */
	if (token_dest (i)->string)
	  {
	    if (fface_get_flags (fface) & ff_Tag1)
	      ustrcat (Job->tag1, token_dest (i)->string);
	    if (fface_get_flags (fface) & ff_Tag2)
	      ustrcat (Job->tag2, token_dest (i)->string);
	    if (fface_get_flags (fface) & ff_Tag3)
	      ustrcat (Job->tag3, token_dest (i)->string);
	    if (fface_get_flags (fface) & ff_Tag4)
	      ustrcat (Job->tag4, token_dest (i)->string);
	    /* Grab the dynamic encodings */
	    if (fface_get_flags (fface) & ff_Encoding)
	      {
		grabbing_encoding = true;
		ustrcat (bufenc, token_dest (i)->string);
	      }
	  }
	else
	  {
	    if (fface_get_flags (fface) & ff_Tag1)
	      GRAB_TAG (Job->tag1);
	    if (fface_get_flags (fface) & ff_Tag2)
	      GRAB_TAG (Job->tag2);
	    if (fface_get_flags (fface) & ff_Tag3)
	      GRAB_TAG (Job->tag3);
	    if (fface_get_flags (fface) & ff_Tag4)
	      GRAB_TAG (Job->tag4);
	    /* Grab the dynamic encodings */
	    if (fface_get_flags (fface) & ff_Encoding)
	      {
		grabbing_encoding = true;
		GRAB_TAG (bufenc);
	      }
	  }

	/*
	 * If not invisible, give it to liba2ps for printing.
	 */
	if (!(fface_get_flags (fface) & ff_Invisible))
	  {
	    if (token_dest (i)->string)
	      ps_print_string (Job, token_dest (i)->string,
			       fface.face);
	    else
	      ps_print_buffer (Job, buffer->content,
			       token_start (i), token_end (i),
			       fface.face);
	  }
      }

  /* FIXME: Find a way to remove the token some day
     token_free (token);
   */
}