/* -*-C-*-

$Id: regex.h,v 1.7 1999/01/02 06:11:34 cph Exp $

Copyright (c) 1987-1999 Massachusetts Institute of Technology

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/* NOTE: This program was created by translation from the regular
expression code of GNU Emacs; it was translated from the original C to
68000 assembly language (in 1986), and then translated back from 68000
assembly language to C (in 1987).  Users should be aware that the GNU
GENERAL PUBLIC LICENSE may apply to this code.  A copy of that license
should have been included along with this file. */

/* Structure to represent a buffer of text to match against.
   This contains the information that an editor buffer would have
   to supply for the matching process to be executed.

   `translation' is an array of MAX_ASCII characters which is used to
   map each character before matching.  Both the pattern and the match
   text are mapped.  This is normally used to implement case
   insensitive searches.

   `syntax_table' describes the syntax of the match text.  See the
   syntax table primitives for more information.

   `text' points to the beginning of the match text.  It is used only
   for translating match text pointers into indices.

   `text_start' and `text_end' delimit the match text.  They define
   the buffer-start and buffer-end for those matching commands that
   refer to them.  Also, all matching must take place within these
   limits.

   `gap_start' and `gap_end' delimit a gap in the match text.  Editor
   buffers normally have such a gap.  For applications without a gap,
   it is recommended that these be set to the same value as
   `text_end'.

   Both `text_start' and `gap_start' are inclusive indices, while
   `text_end' and `gap_end' are exclusive.

   The following conditions must be true:

   (text <= text_start)
   (text_start <= text_end)
   (gap_start <= gap_end)
   (! ((text_start < text_end) &&
       (gap_start < gap_end) &&
       ((text_start == gap_start) || (text_end == gap_end))))

   */

struct re_buffer
  {
    unsigned char *translation;
    SYNTAX_TABLE_TYPE syntax_table;
    unsigned char *text;
    unsigned char *text_start;
    unsigned char *text_end;
    unsigned char *gap_start;
    unsigned char *gap_end;
  };

/* Structure to store "register" contents data in.

   Pass the address of such a structure as an argument to re_match,
   etc., if you want this information back.

   start[i] and end[i] record the string matched by \( ... \) grouping
   i, for i from 1 to RE_NREGS - 1.

   start[0] and end[0] record the entire string matched. */

#define RE_NREGS 10

struct re_registers
  {
    long start[RE_NREGS];
    long end[RE_NREGS];
  };

/* These are the command codes that appear in compiled regular
   expressions, one per byte.  Some command codes are followed by
   argument bytes.  A command code can specify any interpretation
   whatever for its arguments.  Zero-bytes may appear in the compiled
   regular expression. */

enum regexpcode
  {
    regexpcode_unused,
    regexpcode_exact_1,		/* Followed by 1 literal byte */

    /* Followed by one byte giving n, and then by n literal bytes. */
    regexpcode_exact_n,

    regexpcode_line_start,	/* Fails unless at beginning of line */
    regexpcode_line_end,	/* Fails unless at end of line */

    /* Followed by two bytes giving relative address to jump to. */
    regexpcode_jump,

    /* Followed by two bytes giving relative address of place to
       resume at in case of failure. */
    regexpcode_on_failure_jump,

    /* Throw away latest failure point and then jump to address. */
    regexpcode_finalize_jump,

    /* Like jump but finalize if safe to do so.  This is used to jump
       back to the beginning of a repeat.  If the command that follows
       this jump is clearly incompatible with the one at the beginning
       of the repeat, such that we can be sure that there is no use
       backtracking out of repetitions already completed, then we
       finalize. */
    regexpcode_maybe_finalize_jump,

    /* jump, and push a dummy failure point.  This failure point will
       be thrown away if an attempt is made to use it for a failure.
       A + construct makes this before the first repeat. */
    regexpcode_dummy_failure_jump,

    regexpcode_any_char,	/* Matches any one character */

    /* Matches any one char belonging to specified set.  First
       following byte is # bitmap bytes.  Then come bytes for a
       bit-map saying which chars are in.  Bits in each byte are
       ordered low-bit-first.  A character is in the set if its bit is
       1.  A character too large to have a bit in the map is
       automatically not in the set. */
    regexpcode_char_set,

    /* Similar but match any character that is NOT one of those
       specified. */
    regexpcode_not_char_set,

    /* Starts remembering the text that is matched and stores it in a
       memory register.  Followed by one byte containing the register
       number.  Register numbers must be in the range 0 through
       (RE_NREGS - 1) inclusive.  */
    regexpcode_start_memory,

    /* Stops remembering the text that is matched and stores it in a
       memory register.  Followed by one byte containing the register
       number.  Register numbers must be in the range 0 through
       (RE_NREGS - 1) inclusive.  */
    regexpcode_stop_memory,

    /* Match a duplicate of something remembered.  Followed by one
       byte containing the index of the memory register. */
    regexpcode_duplicate,

    regexpcode_buffer_start,	/* Succeeds if at beginning of buffer */
    regexpcode_buffer_end,	/* Succeeds if at end of buffer */
    regexpcode_word_char,	/* Matches any word-constituent character */

    /* Matches any char that is not a word-constituent. */
    regexpcode_not_word_char,

    regexpcode_word_start,	/* Succeeds if at word beginning */
    regexpcode_word_end,	/* Succeeds if at word end */
    regexpcode_word_bound,	/* Succeeds if at a word boundary */
    regexpcode_not_word_bound,	/* Succeeds if not at a word boundary */

    /* Matches any character whose syntax is specified.  Followed by a
       byte which contains a syntax code, Sword or such like. */
    regexpcode_syntax_spec,

    /* Matches any character whose syntax differs from the specified. */
    regexpcode_not_syntax_spec
  };

extern void
  EXFUN (re_buffer_initialize,
	 (struct re_buffer *, unsigned char *, SYNTAX_TABLE_TYPE,
	  unsigned char *, unsigned long, unsigned long,
	  unsigned long, unsigned long));

extern int
  EXFUN (re_compile_fastmap,
	 (unsigned char *, unsigned char *, unsigned char *,
	  SYNTAX_TABLE_TYPE, unsigned char *));

extern int
  EXFUN (re_match,
	 (unsigned char *, unsigned char *, struct re_buffer *,
	  struct re_registers *, unsigned char *, unsigned char *));

extern int
  EXFUN (re_search_forward,
	 (unsigned char *, unsigned char *, struct re_buffer *,
	  struct re_registers *, unsigned char *, unsigned char *));

extern int
  EXFUN (re_search_backward,
	 (unsigned char *, unsigned char *, struct re_buffer *,
	  struct re_registers *, unsigned char *, unsigned char *));