/* -*-C-*-

$Id: regex.c,v 1.20 2000/12/05 21:23:48 cph Exp $

Copyright (c) 1987-2000 Massachusetts Institute of Technology

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/* Regular expression matching and search. */

/* NOTE: This program was created by translation from the regular
expression code of GNU Emacs; it was translated from the original C to
68000 assembly language (in 1986), and then translated back from 68000
assembly language to C (in 1987).  Users should be aware that the GNU
GENERAL PUBLIC LICENSE may apply to this code.  A copy of that license
should have been included along with this file. */

#include "scheme.h"
#include "syntax.h"
#include "regex.h"

#ifdef STDC_HEADERS
#  include <stdlib.h>
#else
   extern char * malloc ();
   extern char * realloc ();
   extern void free ();
#endif

#if defined(__IRIX__) || defined(_AIX)
#define SIGN_EXTEND_CHAR(x) ((((int) (x)) >= 0x80)			\
			     ? (((int) (x)) - 0x100)			\
			     : ((int) (x)))
#endif

#ifndef SIGN_EXTEND_CHAR
#define SIGN_EXTEND_CHAR(x) (x)
#endif /* not SIGN_EXTEND_CHAR */

#ifndef SWITCH_ENUM
#define SWITCH_ENUM(enum_type, expression)				\
  switch ((enum enum_type) (expression))
#endif /* not SWITCH_ENUM */

#ifndef RE_NFAILURES
#define RE_NFAILURES 512
#endif

#define FOR_INDEX_RANGE(index, start, end)				\
  for (index = (start); (index < (end)); index += 1)

#define FOR_INDEX_BELOW(index, limit)					\
  FOR_INDEX_RANGE (index, 0, (limit))

#define FOR_ALL_ASCII(index)						\
  FOR_INDEX_BELOW (index, MAX_ASCII)

#define FOR_ALL_ASCII_SUCH_THAT(index, expression)			\
  FOR_ALL_ASCII (index)							\
    if (expression)

#define TRANSLATE_CHAR(ascii)						\
  ((translation == NULL) ? (ascii) : (translation [(ascii)]))

#define WORD_CONSTITUENT_P(ascii)					\
  (SYNTAX_CONSTITUENT_P (syntaxcode_word, (ascii)))

#define SYNTAX_CONSTITUENT_P(code, ascii)				\
  ((SYNTAX_ENTRY_CODE (SYNTAX_TABLE_REF (syntax_table, (ascii)))) == (code))

#define CHAR_SET_MEMBER_P(length, char_set, ascii)			\
  (((ascii) < ((length) * ASCII_LENGTH)) &&				\
   (CHAR_SET_MEMBER_P_INTERNAL (char_set, ascii)))

#define CHAR_SET_MEMBER_P_INTERNAL(char_set, ascii)			\
  ((((char_set) [((ascii) / ASCII_LENGTH)]) &				\
    (1 << ((ascii) % ASCII_LENGTH)))					\
   != 0)

#define READ_PATTERN_CHAR(target) do					\
{									\
  if (pattern_pc >= pattern_end)					\
    BAD_PATTERN ();							\
  (target) = (*pattern_pc++);						\
} while (0)

#define READ_PATTERN_OFFSET(target) do					\
{									\
  SIGNED char _fetched;							\
  if ((pattern_pc + 1) >= pattern_end)					\
    BAD_PATTERN ();							\
  (target) = (*pattern_pc++);						\
  _fetched = (* ((SIGNED char *) (pattern_pc++)));			\
  (target) += ((SIGN_EXTEND_CHAR (_fetched)) << ASCII_LENGTH);		\
  if (((pattern_pc + (target)) < pattern_start) ||			\
      ((pattern_pc + (target)) > pattern_end))				\
    BAD_PATTERN ();							\
} while (0)

#define READ_PATTERN_LENGTH(target) do					\
{									\
  int _len;								\
									\
  if (pattern_pc >= pattern_end)					\
    BAD_PATTERN ();							\
  _len = ((int) (*pattern_pc++));					\
  if ((pattern_pc + _len) > pattern_end)				\
    BAD_PATTERN ();							\
  (target) = _len;							\
} while (0)

#define READ_PATTERN_REGISTER(target) do				\
{									\
  if ((pattern_pc >= pattern_end) ||					\
      (((target) = (*pattern_pc++)) >= RE_NREGS))			\
    BAD_PATTERN ();							\
} while (0)

#define READ_PATTERN_SYNTAXCODE(target) do				\
{									\
  if ((pattern_pc >= pattern_end) ||					\
      (((int) ((target) = ((enum syntaxcode) (*pattern_pc++))))		\
       >= ((int) syntaxcode_max)))					\
    BAD_PATTERN ();							\
} while (0)

#define BAD_PATTERN() RE_RETURN (-2)

#define PUSH_FAILURE_POINT(pattern_pc, match_pc) do			\
{									\
  if (stack_pointer == stack_end)					\
    {									\
      long stack_length;						\
      unsigned char **stack_temporary;					\
									\
      stack_length = ((stack_end - stack_start) * 2);			\
      if (stack_length > (re_max_failures * 2))				\
	RE_RETURN (-4);							\
      stack_temporary =							\
	((unsigned char **)						\
	 (realloc							\
	  (stack_start, (stack_length * (sizeof (unsigned char *))))));	\
      if (stack_temporary == NULL)					\
	RE_RETURN (-3);							\
      stack_end = (& (stack_temporary [stack_length]));			\
      stack_pointer =							\
	(& (stack_temporary [(stack_pointer - stack_start)]));		\
      stack_start = stack_temporary;					\
    }									\
  (*stack_pointer++) = (pattern_pc);					\
  (*stack_pointer++) = (match_pc);					\
} while (0)

#define RE_RETURN(value)						\
{									\
  return_value = (value);						\
  goto return_point;							\
}

void
DEFUN (re_buffer_initialize,
       (buffer, translation, syntax_table, text,
	text_start_index, text_end_index,
	gap_start_index, gap_end_index),
       struct re_buffer * buffer
       AND unsigned char * translation
       AND SYNTAX_TABLE_TYPE syntax_table
       AND unsigned char * text
       AND unsigned long text_start_index
       AND unsigned long text_end_index
       AND unsigned long gap_start_index
       AND unsigned long gap_end_index)
{
  unsigned char *text_start, *text_end, *gap_start, *gap_end;

  /* Assumes that
     ((text_start_index <= gap_start_index) &&
      (gap_start_index <= gap_end_index) &&
      (gap_end_index <= text_end_index)) */

  text_start = (text + text_start_index);
  text_end = (text + text_end_index);
  gap_start = (text + gap_start_index);
  gap_end = (text + gap_end_index);

  (buffer -> translation) = translation;
  (buffer -> syntax_table) = syntax_table;
  (buffer -> text) = text;
  (buffer -> text_start) = ((text_start == gap_start) ? gap_end : text_start);
  (buffer -> text_end) = ((text_end == gap_end) ? gap_start : text_end);
  (buffer -> gap_start) = gap_start;
  (buffer -> gap_end) = gap_end;
  return;
}

/* Given a compiled pattern between `pattern_start' and `pattern_end',
   generate a character set which is true of all characters which can
   be the first character of a match.

   See the documentation of `struct re_buffer' for a description of
   `translation' and `syntax_table'.

   `fastmap' is the resulting character set.  It is a character array
   whose elements are either `FASTMAP_FALSE' or `FASTMAP_TRUE'.

   Return values:
   0 => pattern cannot match the null string.
   1 => pattern can match the null string.
   2 => pattern can match the null string, but only at end of match
     text or to left of a character in `fastmap'.
   -2 => the pattern is improperly formed.
   else => undefined. */

#define FASTMAP_FALSE '\0'
#define FASTMAP_TRUE '\1'

int
DEFUN (re_compile_fastmap,
       (pattern_start, pattern_end, translation, syntax_table, fastmap),
       unsigned char * pattern_start
       AND fast unsigned char * pattern_end
       AND unsigned char * translation
       AND SYNTAX_TABLE_TYPE syntax_table
       AND fast unsigned char * fastmap)
{
  fast unsigned char *pattern_pc;
  unsigned char *stack_start[RE_NFAILURES];
  unsigned char **stack_pointer;
  int return_value;

  pattern_pc = pattern_start;
  return_value = 0;
  stack_pointer = stack_start;

  {
    fast int i;

    FOR_ALL_ASCII (i)
      (fastmap [i]) = FASTMAP_FALSE;
  }

 loop:
  if (pattern_pc >= pattern_end)
    RE_RETURN (1);

  SWITCH_ENUM (regexpcode, (*pattern_pc++))
    {
    case regexpcode_unused:
    case regexpcode_line_start:
    case regexpcode_buffer_start:
    case regexpcode_buffer_end:
    case regexpcode_word_start:
    case regexpcode_word_end:
    case regexpcode_word_bound:
    case regexpcode_not_word_bound:
      goto loop;

    case regexpcode_line_end:
      {
	(fastmap [(TRANSLATE_CHAR ('\n'))]) = FASTMAP_TRUE;
	if (return_value == 0)
	  return_value = 2;
	goto next;
      }

    case regexpcode_exact_1:
      {
	fast int ascii;

	READ_PATTERN_CHAR (ascii);
	(fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
	goto next;
      }

    case regexpcode_exact_n:
      {
	fast int length;

	READ_PATTERN_LENGTH (length);
	if (length == 0)
	  goto loop;
	(fastmap [(TRANSLATE_CHAR (pattern_pc [0]))]) = FASTMAP_TRUE;
	goto next;
      }

    case regexpcode_any_char:
      {
	fast int ascii;

	FOR_ALL_ASCII_SUCH_THAT (ascii, (ascii != '\n'))
	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
	if (return_value != 0)
	  goto return_point;
	goto next;
      }

    case regexpcode_char_set:
      {
	fast int length;
	fast int ascii;

	READ_PATTERN_LENGTH (length);
	length = (length * ASCII_LENGTH);
	FOR_INDEX_BELOW (ascii, length)
	  if (CHAR_SET_MEMBER_P_INTERNAL (pattern_pc, ascii))
	    (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
	goto next;
      }

    case regexpcode_not_char_set:
      {
	fast int length;
	fast int ascii;

	READ_PATTERN_LENGTH (length);
	length = (length * ASCII_LENGTH);
	FOR_INDEX_BELOW (ascii, length)
	  if (! (CHAR_SET_MEMBER_P_INTERNAL (pattern_pc, ascii)))
	    (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
	FOR_INDEX_RANGE (ascii, length, MAX_ASCII)
	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
	goto next;
      }

    case regexpcode_word_char:
      {
	fast int ascii;

	FOR_ALL_ASCII_SUCH_THAT (ascii, (WORD_CONSTITUENT_P (ascii)))
	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
	goto next;
      }

    case regexpcode_not_word_char:
      {
	fast int ascii;

	FOR_ALL_ASCII_SUCH_THAT (ascii, (! (WORD_CONSTITUENT_P (ascii))))
	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
	goto next;
      }

    case regexpcode_syntax_spec:
      {
	fast enum syntaxcode code;
	fast int ascii;

	READ_PATTERN_SYNTAXCODE (code);
	FOR_ALL_ASCII_SUCH_THAT (ascii, (SYNTAX_CONSTITUENT_P (code, ascii)))
	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
	goto next;
      }

    case regexpcode_not_syntax_spec:
      {
	fast enum syntaxcode code;
	fast int ascii;

	READ_PATTERN_SYNTAXCODE (code);
	FOR_ALL_ASCII_SUCH_THAT (ascii,
				 (! (SYNTAX_CONSTITUENT_P (code, ascii))))
	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
	goto next;
      }

    case regexpcode_start_memory:
    case regexpcode_stop_memory:
      {
	fast int register_number;

	READ_PATTERN_REGISTER (register_number);
	goto loop;
      }

    case regexpcode_duplicate:
      {
	fast int register_number;
	fast int ascii;

	READ_PATTERN_REGISTER (register_number);
	FOR_ALL_ASCII (ascii)
	  (fastmap [ascii]) = FASTMAP_TRUE;
	RE_RETURN (1);
      }

    case regexpcode_jump:
    case regexpcode_finalize_jump:
    case regexpcode_maybe_finalize_jump:
    case regexpcode_dummy_failure_jump:
      {
	fast int offset;

	return_value = 1;
	READ_PATTERN_OFFSET (offset);
	pattern_pc += offset;
	if (offset > 0)
	  goto loop;

	/* Jump backward reached implies we just went through the
	   body of a loop and matched nothing.  Opcode jumped to
	   should be an on_failure_jump.  Just treat it like an
	   ordinary jump.  For a * loop, it has pushed its failure
	   point already; if so, discard that as redundant. */
	if (pattern_pc >= pattern_end)
	  BAD_PATTERN ();
	if (((enum regexpcode) (pattern_pc [0])) !=
	    regexpcode_on_failure_jump)
	  goto loop;
	READ_PATTERN_OFFSET (offset);
	pattern_pc += offset;
	if ((stack_pointer != stack_start) &&
	    ((stack_pointer [-1]) == pattern_pc))
	  stack_pointer -= 1;
	goto loop;
      }

    case regexpcode_on_failure_jump:
      {
	fast int offset;

	READ_PATTERN_OFFSET (offset);
	(*stack_pointer++) = (pattern_pc + offset);
	goto loop;
      }

    default:
      BAD_PATTERN ();
    }

 next:
  if (stack_pointer != stack_start)
    {
      pattern_pc = (*--stack_pointer);
      goto loop;
    }

 return_point:
  return (return_value);
}

/* Match the compiled pattern described by `pattern_start' and
   `pattern_end' against the characters in `buffer' between
   `match_start' and `match_end'.

   `registers', if not NULL, will be filled with the start and end
   indices of the match registers if the match succeeds.

   It is assumed that the following is true:

   (! ((gap_start < gap_end) &&
       (match_start < match_end) &&
       ((match_start == gap_start) || (match_end == gap_end))))

   Return values:

   non-negative => the end index (exclusive) of the match.
   -1 => no match.
   -2 => the pattern is badly formed.
   -3 => memory allocation error.
   -4 => match stack overflow.
   other => undefined. */

#define RE_MATCH_FAILED (-1)

/* This macro is used by search procedures to decide when a match at a
   particular place has failed.  If true, the search continues by
   advancing to the next match point.  */
#define RE_MATCH_FAILURE_RESULT(result)					\
  (((result) == RE_MATCH_FAILED) || ((result) == (-4)))

#define ADDRESS_TO_INDEX(address)					\
  ((((address) > gap_start) ? ((address) - gap_length) : (address))	\
   - (buffer -> text))

#define READ_MATCH_CHAR(target) do					\
{									\
  if (match_pc >= match_end)						\
    goto re_match_fail;							\
  (target) = (TRANSLATE_CHAR (*match_pc++));				\
  if (match_pc == gap_start)						\
    match_pc = gap_end;							\
} while (0)

static Boolean
DEFUN (beq_translate, (scan1, scan2, length, translation),
       unsigned char * scan1 AND
       unsigned char * scan2 AND
       long length AND
       unsigned char * translation)
{
  while ((length--) > 0)
    if ((TRANSLATE_CHAR (*scan1++)) != (TRANSLATE_CHAR (*scan2++)))
      return (false);
  return (true);
}

int re_max_failures = 1000;

int
DEFUN (re_match,
       (pattern_start, pattern_end, buffer, registers, match_start, match_end),
       unsigned char * pattern_start
       AND unsigned char * pattern_end
       AND struct re_buffer * buffer
       AND struct re_registers * registers
       AND unsigned char * match_start
       AND unsigned char * match_end)
{
  fast unsigned char *pattern_pc, *match_pc;
  unsigned char *gap_start, *gap_end;
  unsigned char *translation;
  SYNTAX_TABLE_TYPE syntax_table;
  long gap_length;
  int return_value;

  /* Failure point stack.  Each place that can handle a failure
     further down the line pushes a failure point on this stack.  It
     consists of two char *'s.  The first one pushed is where to
     resume scanning the pattern; the second pushed is where to resume
     scanning the match text.  If the latter is NULL, the failure
     point is a "dummy".  If a failure happens and the innermost
     failure point is dormant, it discards that failure point and
     tries the next one. */

  unsigned char **stack_start, **stack_end, **stack_pointer;

  /* Information on the "contents" of registers.  These are pointers
     into the match text; they record just what was matched (on this
     attempt) by some part of the pattern.  The start_memory command
     stores the start of a register's contents and the stop_memory
     command stores the end.

     At that point, (register_start [regnum]) points to the first
     character in the register, and (register_end [regnum]) points to
     the first character beyond the end of the register. */

  unsigned char *register_start[RE_NREGS];
  unsigned char *register_end[RE_NREGS];

  pattern_pc = pattern_start;
  match_pc = match_start;
  gap_start = (buffer -> gap_start);
  gap_end = (buffer -> gap_end);
  gap_length = (gap_end - gap_start);
  translation = (buffer -> translation);
  syntax_table = (buffer -> syntax_table);

  stack_start =
    ((unsigned char **) (malloc ((2 * RE_NFAILURES) * (sizeof (char *)))));
  if (stack_start == NULL)
    RE_RETURN (-3);

  stack_end = (& (stack_start [(2 * RE_NFAILURES)]));
  stack_pointer = stack_start;

  {
    fast int i;

    FOR_INDEX_BELOW (i, RE_NREGS)
      {
	(register_start [i]) = NULL;
	(register_end [i]) = NULL;
      }
  }

 re_match_loop:
  if (pattern_pc >= pattern_end)
    {
      /* Reaching here indicates that match was successful. */
      if (registers != NULL)
	{
	  fast int i;

	  (register_start [0]) = match_start;
	  (register_end [0]) = match_pc;
	  FOR_INDEX_BELOW (i, RE_NREGS)
	    {
	      ((registers -> start) [i]) =
		(((register_start [i]) == NULL)
		 ? -1
		 : (ADDRESS_TO_INDEX (register_start [i])));
	      ((registers -> end) [i]) =
		(((register_end [i]) == NULL)
		 ? -1
		 : (ADDRESS_TO_INDEX (register_end [i])));
	    }
	}
      RE_RETURN (ADDRESS_TO_INDEX (match_pc));
    }

  SWITCH_ENUM (regexpcode, (*pattern_pc++))
    {
    case regexpcode_unused:
      goto re_match_loop;

    case regexpcode_exact_1:
      {
	fast int ascii;
	fast int ascii_p;

	READ_MATCH_CHAR (ascii);
	READ_PATTERN_CHAR (ascii_p);
	if (ascii == ascii_p)
	  goto re_match_loop;
	goto re_match_fail;
      }

    case regexpcode_exact_n:
      {
	fast int length;
	fast int ascii;

	READ_PATTERN_LENGTH (length);
	while ((length--) > 0)
	  {
	    READ_MATCH_CHAR (ascii);
	    if (ascii != (*pattern_pc++))
	      goto re_match_fail;
	  }
	goto re_match_loop;
      }

    case regexpcode_any_char:
      {
	fast int ascii;

	READ_MATCH_CHAR (ascii);
	if (ascii == '\n')
	  goto re_match_fail;
	goto re_match_loop;
      }

#define RE_MATCH_CHAR_SET(winning_label, losing_label)			\
      {									\
	fast int ascii;							\
	fast int length;						\
									\
	READ_MATCH_CHAR (ascii);					\
	READ_PATTERN_LENGTH (length);					\
	if (CHAR_SET_MEMBER_P (length, pattern_pc, ascii))		\
	  {								\
	    pattern_pc += length;					\
	    goto winning_label;						\
	  }								\
	else								\
	  {								\
	    pattern_pc += length;					\
	    goto losing_label;						\
	  }								\
      }

    case regexpcode_char_set:
      RE_MATCH_CHAR_SET (re_match_loop, re_match_fail);

    case regexpcode_not_char_set:
      RE_MATCH_CHAR_SET (re_match_fail, re_match_loop);

#undef RE_MATCH_CHAR_SET

    /* \( is represented by a start_memory, \) by a stop_memory.  Both
       of those commands contain a "register number" argument.  The
       text matched within the \( and \) is recorded under that
       number.  Then, \<digit> turns into a `duplicate' command which
       is followed by the numeric value of <digit> as the register
       number. */

    case regexpcode_start_memory:
      {
	fast int register_number;

	READ_PATTERN_REGISTER (register_number);
	(register_start [register_number]) = match_pc;
	goto re_match_loop;
      }

    case regexpcode_stop_memory:
      {
	fast int register_number;

	READ_PATTERN_REGISTER (register_number);
	(register_end [register_number]) =
	  ((match_pc == gap_end) ? gap_start : match_pc);
	goto re_match_loop;
      }

    case regexpcode_duplicate:
      {
	fast int register_number;
	unsigned char *start, *end, *new_end;
	long length;

	READ_PATTERN_REGISTER (register_number);
	start = (register_start [register_number]);
	end = (register_end [register_number]);
	length = (end - start);
	if (length <= 0)
	  goto re_match_loop;
	new_end = (match_pc + length);
	if (new_end > match_end)
	  goto re_match_fail;
	if ((match_pc <= gap_start) && (new_end > gap_start))
	  {
	    long length1, length2;

	    new_end += gap_length;
	    if (new_end > match_end)
	      goto re_match_fail;
	    length1 = (gap_start - match_pc);
	    length2 = (length - length1);
	    if (!
		((beq_translate (match_pc, start, length1, translation)) &&
		 (beq_translate (gap_end, (start + length1), length2,
				 translation))))
	      goto re_match_fail;
	  }
	else if ((start <= gap_start) && (end > gap_start))
	  {
	    long length1, length2;

	    length1 = (gap_start - start);
	    length2 = (end - gap_end);
	    if (!
		((beq_translate (match_pc, start, length1, translation)) &&
		 (beq_translate ((match_pc + length1), gap_end, length2,
				 translation))))
	      goto re_match_fail;
	  }
	else
	  {
	    if (! (beq_translate (match_pc, start, length, translation)))
	      goto re_match_fail;
	  }
	match_pc = ((new_end == gap_start) ? gap_end : new_end);
	goto re_match_loop;
      }

    case regexpcode_buffer_start:
      {
	if ((ADDRESS_TO_INDEX (match_pc))
	    == (ADDRESS_TO_INDEX (buffer -> text_start)))
	  goto re_match_loop;
	goto re_match_fail;
      }

    case regexpcode_buffer_end:
      {
	if ((ADDRESS_TO_INDEX (match_pc))
	    == (ADDRESS_TO_INDEX (buffer -> text_end)))
	  goto re_match_loop;
	goto re_match_fail;
      }

    case regexpcode_line_start:
      {
	if ((ADDRESS_TO_INDEX (match_pc))
	    == (ADDRESS_TO_INDEX (buffer -> text_start)))
	  goto re_match_loop;
	if ((TRANSLATE_CHAR
	     (((match_pc == gap_end) ? gap_start : match_pc) [-1]))
	    == '\n')
	  goto re_match_loop;
	goto re_match_fail;
      }

    case regexpcode_line_end:
      {
	if (((ADDRESS_TO_INDEX (match_pc))
	     == (ADDRESS_TO_INDEX (buffer -> text_end)))
	    || ((TRANSLATE_CHAR (match_pc [0])) == '\n'))
	  goto re_match_loop;
	goto re_match_fail;
      }

#define RE_MATCH_WORD_BOUND(word_bound_p)				\
  if ((match_pc == gap_end)						\
      ? (word_bound_p							\
	 (((gap_start != (buffer -> text_start))			\
	   && (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_start[-1])))),	\
	  ((gap_end != (buffer -> text_end))				\
	   && (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_end[0]))))))	\
      : (word_bound_p							\
	 (((match_pc != (buffer -> text_start))				\
	   && (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc[-1])))),	\
	  ((match_pc != (buffer -> text_end))				\
	   && (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc[0])))))))	\
      goto re_match_loop;						\
    goto re_match_fail

    case regexpcode_word_bound:
#define WORD_BOUND_P(left_p, right_p) ((left_p) != (right_p))
      RE_MATCH_WORD_BOUND (WORD_BOUND_P);
#undef WORD_BOUND_P

    case regexpcode_not_word_bound:
#define NOT_WORD_BOUND_P(left_p, right_p) ((left_p) == (right_p))
      RE_MATCH_WORD_BOUND (NOT_WORD_BOUND_P);
#undef NOT_WORD_BOUND_P

    case regexpcode_word_start:
#define WORD_START_P(left_p, right_p) ((! (left_p)) && (right_p))
      RE_MATCH_WORD_BOUND (WORD_START_P);
#undef WORD_START_P

    case regexpcode_word_end:
#define WORD_END_P(left_p, right_p) ((left_p) && (! (right_p)))
      RE_MATCH_WORD_BOUND (WORD_END_P);
#undef WORD_END_P

#undef RE_MATCH_WORD_BOUND

    case regexpcode_syntax_spec:
      {
	fast int ascii;
	fast enum syntaxcode code;

	READ_MATCH_CHAR (ascii);
	READ_PATTERN_SYNTAXCODE (code);
	if (SYNTAX_CONSTITUENT_P (code, ascii))
	  goto re_match_loop;
	goto re_match_fail;
      }

    case regexpcode_not_syntax_spec:
      {
	fast int ascii;
	fast enum syntaxcode code;

	READ_MATCH_CHAR (ascii);
	READ_PATTERN_SYNTAXCODE (code);
	if (! (SYNTAX_CONSTITUENT_P (code, ascii)))
	  goto re_match_loop;
	goto re_match_fail;
      }

    case regexpcode_word_char:
      {
	fast int ascii;

	READ_MATCH_CHAR (ascii);
	if (WORD_CONSTITUENT_P (ascii))
	  goto re_match_loop;
	goto re_match_fail;
      }

    case regexpcode_not_word_char:
      {
	fast int ascii;

	READ_MATCH_CHAR (ascii);
	if (! (WORD_CONSTITUENT_P (ascii)))
	  goto re_match_loop;
	goto re_match_fail;
      }

    /* "or" constructs ("|") are handled by starting each alternative
       with an on_failure_jump that points to the start of the next
       alternative.  Each alternative except the last ends with a jump
       to the joining point.  (Actually, each jump except for the last
       one really jumps to the following jump, because tensioning the
       jumps is a hassle.)

       The start of a stupid repeat has an on_failure_jump that points
       past the end of the repeat text.  This makes a failure point so
       that, on failure to match a repetition, matching restarts past
       as many repetitions have been found with no way to fail and
       look for another one.

       A smart repeat is similar but loops back to the on_failure_jump
       so that each repetition makes another failure point. */

    case regexpcode_on_failure_jump:
      {
	fast long offset;

	READ_PATTERN_OFFSET (offset);
	PUSH_FAILURE_POINT ((pattern_pc + offset), match_pc);
	goto re_match_loop;
      }

    /* The end of a smart repeat has a maybe_finalize_jump back.
       Change it either to a finalize_jump or an ordinary jump. */

    case regexpcode_maybe_finalize_jump:
      {
	fast long offset;
	fast long ascii;

	READ_PATTERN_OFFSET (offset);
	if (pattern_pc == pattern_end)
	  goto finalize_jump;

	/* Compare what follows with the beginning of the repeat.
	   If we can establish that there is nothing that they
	   would both match, we can change to `finalize_jump'. */

	SWITCH_ENUM (regexpcode, (pattern_pc [0]))
	  {
	  case regexpcode_exact_1:
	    ascii = (pattern_pc [1]);
	    break;

	  case regexpcode_exact_n:
	    ascii = (pattern_pc [2]);
	    break;

	  case regexpcode_line_end:
	    ascii = ('\n');
	    break;

	  default:
	    goto dont_finalize_jump;
	  }

	/* (pattern_pc [(offset - 3)]) is an `on_failure_jump'.
	   Examine what follows that. */
	SWITCH_ENUM (regexpcode, (pattern_pc [offset]))
	  {
	  case regexpcode_exact_1:
	    {
	      if (ascii != (pattern_pc [(offset + 1)]))
		goto finalize_jump;
	      goto dont_finalize_jump;
	    }

	  case regexpcode_exact_n:
	    {
	      if (ascii != (pattern_pc [(offset + 2)]))
		goto finalize_jump;
	      goto dont_finalize_jump;
	    }

	  case regexpcode_char_set:
	    {
	      if (CHAR_SET_MEMBER_P ((pattern_pc [(offset + 1)]),
				     (& (pattern_pc [(offset + 2)])),
				     ascii))
		goto dont_finalize_jump;
	      goto finalize_jump;
	    }

	  case regexpcode_not_char_set:
	    {
	      if (CHAR_SET_MEMBER_P ((pattern_pc [(offset + 1)]),
				     (& (pattern_pc [(offset + 2)])),
				     ascii))
		goto finalize_jump;
	      goto dont_finalize_jump;
	    }

	  default:
	    goto dont_finalize_jump;
	  }

      finalize_jump:
	pattern_pc -= 2;
	(pattern_pc [-1]) = ((unsigned char) regexpcode_finalize_jump);
	goto re_match_finalize_jump;

      dont_finalize_jump:
	pattern_pc -= 2;
	(pattern_pc [-1]) = ((unsigned char) regexpcode_jump);
	goto re_match_jump;
      }

    case regexpcode_finalize_jump:
    re_match_finalize_jump:
      {
	stack_pointer -= 2;
	goto re_match_jump;
      }

    case regexpcode_jump:
    re_match_jump:
      {
	fast long offset;

	READ_PATTERN_OFFSET (offset);
	pattern_pc += offset;
	goto re_match_loop;
      }

    case regexpcode_dummy_failure_jump:
      {
	PUSH_FAILURE_POINT (NULL, NULL);
	goto re_match_jump;
      }

    default:
      {
	BAD_PATTERN ();
      }
    }

 re_match_fail:
  if (stack_pointer == stack_start)
    RE_RETURN (RE_MATCH_FAILED);
  match_pc = (*--stack_pointer);
  pattern_pc = (*--stack_pointer);
  if (pattern_pc != NULL)
    goto re_match_loop;
  goto re_match_fail;

 return_point:
  if (stack_start != NULL)
    free (stack_start);
  return (return_value);
}

#define DEFINE_RE_SEARCH(name)						\
int									\
DEFUN (name,								\
       (pattern_start, pattern_end, buffer, registers,			\
	match_start, match_end),					\
       unsigned char * pattern_start					\
       AND unsigned char * pattern_end					\
       AND struct re_buffer * buffer					\
       AND struct re_registers * registers				\
       AND unsigned char * match_start					\
       AND unsigned char * match_end)

#define INITIALIZE_RE_SEARCH(pc, limit, gap_limit)			\
  int can_be_null;							\
  unsigned char *translation;						\
  int match_result;							\
									\
  fast unsigned char *match_pc;						\
  fast unsigned char *match_limit;					\
  fast unsigned char *gap_limit;					\
  fast unsigned char *fastmap;						\
  unsigned char fastmap_array[MAX_ASCII];				\
									\
  fastmap = &fastmap_array[0];						\
  translation = (buffer -> translation);				\
  can_be_null =								\
    (re_compile_fastmap							\
     (pattern_start, pattern_end, translation,				\
      (buffer -> syntax_table), fastmap));				\
  if (can_be_null < 0)							\
    return (can_be_null);						\
									\
  match_pc = (pc);							\
  match_limit = (limit);						\
  gap_limit = (buffer -> gap_limit)

#define RE_SEARCH_TEST(start)						\
  (re_match								\
   (pattern_start, pattern_end, buffer, registers, (start), match_end))

#define RE_SEARCH_FORWARD_FAST(limit) do				\
{									\
  while (true)								\
    {									\
      if (match_pc >= (limit))						\
	break;								\
									\
      if ((fastmap [(TRANSLATE_CHAR (*match_pc++))]) == FASTMAP_FALSE)	\
	continue;							\
									\
      match_result = (RE_SEARCH_TEST (match_pc - 1));			\
      if (RE_MATCH_FAILURE_RESULT (match_result))			\
	continue;							\
									\
      return (match_result);						\
    }									\
} while (0)

DEFINE_RE_SEARCH (re_search_forward)
{
  INITIALIZE_RE_SEARCH (match_start, match_end, gap_start);

  if (can_be_null != 1)
    {
      if ((match_pc < gap_start) && (gap_start < match_limit))
	RE_SEARCH_FORWARD_FAST (gap_start);
      if (match_pc == gap_start)
	match_pc = (buffer -> gap_end);
      RE_SEARCH_FORWARD_FAST (match_limit);
      return
	((can_be_null == 0)
	 ? RE_MATCH_FAILED
	 : (RE_SEARCH_TEST (match_limit)));
    }
  else
    {
      while (true)
	{
	  match_result = (RE_SEARCH_TEST (match_pc));
	  if (! (RE_MATCH_FAILURE_RESULT (match_result)))
	    return (match_result);
	  match_pc += 1;
	  if (match_pc == gap_start)
	    match_pc = (buffer -> gap_end);
	  if (match_pc > match_limit)
	    return (match_result);
	}
    }
}

#define RE_SEARCH_BACKWARD_FAST(limit) do				\
{									\
  while (true)								\
    {									\
      if (match_pc <= (limit))						\
	break;								\
									\
      if ((fastmap [(TRANSLATE_CHAR (*--match_pc))]) == FASTMAP_FALSE)	\
	continue;							\
									\
      match_result = (RE_SEARCH_TEST (match_pc));			\
      if (RE_MATCH_FAILURE_RESULT (match_result))			\
	continue;							\
									\
      RE_SEARCH_BACKWARD_RETURN (match_pc);				\
    }									\
} while (0)

#define RE_SEARCH_BACKWARD_RETURN(start)				\
  return								\
    ((match_result < 0)							\
     ? match_result							\
     : ((((start) > (buffer -> gap_start))				\
	 ? ((start) - (gap_end - (buffer -> gap_start)))		\
	 : (start))							\
	- (buffer -> text)))

DEFINE_RE_SEARCH (re_search_backward)
{
  INITIALIZE_RE_SEARCH (match_end, match_start, gap_end);

  if (can_be_null != 1)
    {
      if ((match_pc > gap_end) && (gap_end > match_limit))
	RE_SEARCH_BACKWARD_FAST (gap_end);
      if (match_pc == gap_end)
	match_pc = (buffer -> gap_start);
      RE_SEARCH_BACKWARD_FAST (match_limit);
      if (can_be_null == 0)
	return (RE_MATCH_FAILED);
      match_result = (RE_SEARCH_TEST (match_limit));
      RE_SEARCH_BACKWARD_RETURN (match_limit);
    }
  else
    {
      while (true)
	{
	  match_result = (RE_SEARCH_TEST (match_pc));
	  if (! (RE_MATCH_FAILURE_RESULT (match_result)))
	    RE_SEARCH_BACKWARD_RETURN (match_pc);
	  if (match_pc == gap_end)
	    match_pc = (buffer -> gap_start);
	  match_pc -= 1;
	  if (match_pc < match_limit)
	    RE_SEARCH_BACKWARD_RETURN (match_pc);
	}
    }
}


syntax highlighted by Code2HTML, v. 0.9.1