/* Copyright (C) 1995, 1996 Tom Lord
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Library General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this software; see the file COPYING. If not, write to
* the Free Software Foundation, 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
/*
* Tom Lord (lord@cygnus.com, lord@gnu.ai.mit.edu)
*/
#include "rxall.h"
#include "rxnfa.h"
#define remalloc(M, S) (M ? realloc (M, S) : malloc (S))
/* {Low Level Data Structure Code}
*/
/* Constructs a new nfa node. */
#ifdef __STDC__
struct rx_nfa_state *
rx_nfa_state (struct rx *rx)
#else
struct rx_nfa_state *
rx_nfa_state (rx)
struct rx *rx;
#endif
{
struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n));
if (!n)
return 0;
rx_bzero ((char *)n, sizeof (*n));
n->next = rx->nfa_states;
rx->nfa_states = n;
return n;
}
#ifdef __STDC__
static void
rx_free_nfa_state (struct rx_nfa_state * n)
#else
static void
rx_free_nfa_state (n)
struct rx_nfa_state * n;
#endif
{
free ((char *)n);
}
/* This adds an edge between two nodes, but doesn't initialize the
* edge label.
*/
#ifdef __STDC__
struct rx_nfa_edge *
rx_nfa_edge (struct rx *rx,
enum rx_nfa_etype type,
struct rx_nfa_state *start,
struct rx_nfa_state *dest)
#else
struct rx_nfa_edge *
rx_nfa_edge (rx, type, start, dest)
struct rx *rx;
enum rx_nfa_etype type;
struct rx_nfa_state *start;
struct rx_nfa_state *dest;
#endif
{
struct rx_nfa_edge *e;
e = (struct rx_nfa_edge *)malloc (sizeof (*e));
if (!e)
return 0;
e->next = start->edges;
start->edges = e;
e->type = type;
e->dest = dest;
return e;
}
#ifdef __STDC__
static void
rx_free_nfa_edge (struct rx_nfa_edge * e)
#else
static void
rx_free_nfa_edge (e)
struct rx_nfa_edge * e;
#endif
{
free ((char *)e);
}
/* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure
* of an NFA. These are added to an nfa automaticly by eclose_nfa.
*/
#ifdef __STDC__
static struct rx_possible_future *
rx_possible_future (struct rx * rx,
struct rx_se_list * effects)
#else
static struct rx_possible_future *
rx_possible_future (rx, effects)
struct rx * rx;
struct rx_se_list * effects;
#endif
{
struct rx_possible_future *ec;
ec = (struct rx_possible_future *) malloc (sizeof (*ec));
if (!ec)
return 0;
ec->destset = 0;
ec->next = 0;
ec->effects = effects;
return ec;
}
#ifdef __STDC__
static void
rx_free_possible_future (struct rx_possible_future * pf)
#else
static void
rx_free_possible_future (pf)
struct rx_possible_future * pf;
#endif
{
free ((char *)pf);
}
#ifdef __STDC__
static void
rx_free_nfa_graph (struct rx *rx)
#else
static void
rx_free_nfa_graph (rx)
struct rx *rx;
#endif
{
while (rx->nfa_states)
{
while (rx->nfa_states->edges)
{
switch (rx->nfa_states->edges->type)
{
case ne_cset:
rx_free_cset (rx->nfa_states->edges->params.cset);
break;
default:
break;
}
{
struct rx_nfa_edge * e;
e = rx->nfa_states->edges;
rx->nfa_states->edges = rx->nfa_states->edges->next;
rx_free_nfa_edge (e);
}
} /* while (rx->nfa_states->edges) */
{
/* Iterate over the partial epsilon closures of rx->nfa_states */
struct rx_possible_future * pf = rx->nfa_states->futures;
while (pf)
{
struct rx_possible_future * pft = pf;
pf = pf->next;
rx_free_possible_future (pft);
}
}
{
struct rx_nfa_state *n;
n = rx->nfa_states;
rx->nfa_states = rx->nfa_states->next;
rx_free_nfa_state (n);
}
}
}
/* {Translating a Syntax Tree into an NFA}
*
*/
/* This is the Thompson regexp->nfa algorithm.
* It is modified to allow for `side-effect epsilons.' Those are
* edges that are taken whenever a similarly placed epsilon edge
* would be, but which also imply that some side effect occurs
* when the edge is taken.
*
* Side effects are used to model parts of the pattern langauge
* that are not regular.
*/
#ifdef __STDC__
int
rx_build_nfa (struct rx *rx,
struct rexp_node *rexp,
struct rx_nfa_state **start,
struct rx_nfa_state **end)
#else
int
rx_build_nfa (rx, rexp, start, end)
struct rx *rx;
struct rexp_node *rexp;
struct rx_nfa_state **start;
struct rx_nfa_state **end;
#endif
{
struct rx_nfa_edge *edge;
/* Start & end nodes may have been allocated by the caller. */
*start = *start ? *start : rx_nfa_state (rx);
if (!*start)
return 0;
if (!rexp)
{
*end = *start;
return 1;
}
*end = *end ? *end : rx_nfa_state (rx);
if (!*end)
{
rx_free_nfa_state (*start);
return 0;
}
switch (rexp->type)
{
case r_cset:
edge = rx_nfa_edge (rx, ne_cset, *start, *end);
(*start)->has_cset_edges = 1;
if (!edge)
return 0;
edge->params.cset = rx_copy_cset (rx->local_cset_size,
rexp->params.cset);
if (!edge->params.cset)
{
rx_free_nfa_edge (edge);
return 0;
}
return 1;
case r_string:
{
if (rexp->params.cstr.len == 1)
{
edge = rx_nfa_edge (rx, ne_cset, *start, *end);
(*start)->has_cset_edges = 1;
if (!edge)
return 0;
edge->params.cset = rx_cset (rx->local_cset_size);
if (!edge->params.cset)
{
rx_free_nfa_edge (edge);
return 0;
}
RX_bitset_enjoin (edge->params.cset, rexp->params.cstr.contents[0]);
return 1;
}
else
{
struct rexp_node copied;
struct rx_nfa_state * shared;
copied = *rexp;
shared = 0;
copied.params.cstr.len--;
copied.params.cstr.contents++;
if (!rx_build_nfa (rx, &copied, &shared, end))
return 0;
copied.params.cstr.len = 1;
copied.params.cstr.contents--;
return rx_build_nfa (rx, &copied, start, &shared);
}
}
case r_opt:
return (rx_build_nfa (rx, rexp->params.pair.left, start, end)
&& rx_nfa_edge (rx, ne_epsilon, *start, *end));
case r_plus:
{
struct rx_nfa_state * star_start = 0;
struct rx_nfa_state * star_end = 0;
struct rx_nfa_state * shared;
shared = 0;
if (!rx_build_nfa (rx, rexp->params.pair.left, start, &shared))
return 0;
return (rx_build_nfa (rx, rexp->params.pair.left,
&star_start, &star_end)
&& star_start
&& star_end
&& rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
&& rx_nfa_edge (rx, ne_epsilon, shared, star_start)
&& rx_nfa_edge (rx, ne_epsilon, star_end, *end)
&& rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
}
case r_interval:
case r_star:
{
struct rx_nfa_state * star_start = 0;
struct rx_nfa_state * star_end = 0;
return (rx_build_nfa (rx, rexp->params.pair.left,
&star_start, &star_end)
&& star_start
&& star_end
&& rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
&& rx_nfa_edge (rx, ne_epsilon, *start, star_start)
&& rx_nfa_edge (rx, ne_epsilon, star_end, *end)
&& rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
}
case r_cut:
{
struct rx_nfa_state * cut_end = 0;
cut_end = rx_nfa_state (rx);
if (!(cut_end && rx_nfa_edge (rx, ne_epsilon, *start, cut_end)))
{
rx_free_nfa_state (*start);
rx_free_nfa_state (*end);
if (cut_end)
rx_free_nfa_state (cut_end);
return 0;
}
cut_end->is_final = rexp->params.intval;
return 1;
}
case r_parens:
return rx_build_nfa (rx, rexp->params.pair.left, start, end);
case r_concat:
{
struct rx_nfa_state *shared = 0;
return
(rx_build_nfa (rx, rexp->params.pair.left, start, &shared)
&& rx_build_nfa (rx, rexp->params.pair.right, &shared, end));
}
case r_alternate:
{
struct rx_nfa_state *ls = 0;
struct rx_nfa_state *le = 0;
struct rx_nfa_state *rs = 0;
struct rx_nfa_state *re = 0;
return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le)
&& rx_build_nfa (rx, rexp->params.pair.right, &rs, &re)
&& rx_nfa_edge (rx, ne_epsilon, *start, ls)
&& rx_nfa_edge (rx, ne_epsilon, *start, rs)
&& rx_nfa_edge (rx, ne_epsilon, le, *end)
&& rx_nfa_edge (rx, ne_epsilon, re, *end));
}
case r_context:
edge = rx_nfa_edge (rx, ne_side_effect, *start, *end);
if (!edge)
return 0;
edge->params.side_effect = (void *)rexp->params.intval;
return 1;
}
/* this should never happen */
return 0;
}
/* {Low Level Data Structures for the Static NFA->SuperNFA Analysis}
*
* There are side effect lists -- lists of side effects occuring
* along an uninterrupted, acyclic path of side-effect epsilon edges.
* Such paths are collapsed to single edges in the course of computing
* epsilon closures. The resulting single edges are labled with a list
* of all the side effects from the original multi-edge path. Equivalent
* lists of side effects are made == by the constructors below.
*
* There are also nfa state sets. These are used to hold a list of all
* states reachable from a starting state for a given type of transition
* and side effect list. These are also hash-consed.
*/
/* The next several functions compare, construct, etc. lists of side
* effects. See ECLOSE_NFA (below) for details.
*/
/* Ordering of rx_se_list
* (-1, 0, 1 return value convention).
*/
#ifdef __STDC__
static int
se_list_cmp (void * va, void * vb)
#else
static int
se_list_cmp (va, vb)
void * va;
void * vb;
#endif
{
struct rx_se_list * a = (struct rx_se_list *)va;
struct rx_se_list * b = (struct rx_se_list *)vb;
return ((va == vb)
? 0
: (!va
? -1
: (!vb
? 1
: ((long)a->car < (long)b->car
? 1
: ((long)a->car > (long)b->car
? -1
: se_list_cmp ((void *)a->cdr, (void *)b->cdr))))));
}
#ifdef __STDC__
static int
se_list_equal (void * va, void * vb)
#else
static int
se_list_equal (va, vb)
void * va;
void * vb;
#endif
{
return !(se_list_cmp (va, vb));
}
static struct rx_hash_rules se_list_hash_rules = { se_list_equal, 0, 0, 0, 0 };
#ifdef __STDC__
static struct rx_se_list *
side_effect_cons (struct rx * rx,
void * se, struct rx_se_list * list)
#else
static struct rx_se_list *
side_effect_cons (rx, se, list)
struct rx * rx;
void * se;
struct rx_se_list * list;
#endif
{
struct rx_se_list * l;
l = ((struct rx_se_list *) malloc (sizeof (*l)));
if (!l)
return 0;
l->car = se;
l->cdr = list;
return l;
}
#ifdef __STDC__
static struct rx_se_list *
hash_cons_se_prog (struct rx * rx,
struct rx_hash * memo,
void * car, struct rx_se_list * cdr)
#else
static struct rx_se_list *
hash_cons_se_prog (rx, memo, car, cdr)
struct rx * rx;
struct rx_hash * memo;
void * car;
struct rx_se_list * cdr;
#endif
{
long hash = (long)car ^ (long)cdr;
struct rx_se_list template;
template.car = car;
template.cdr = cdr;
{
struct rx_hash_item * it = rx_hash_store (memo, hash,
(void *)&template,
&se_list_hash_rules);
if (!it)
return 0;
if (it->data == (void *)&template)
{
struct rx_se_list * consed;
consed = (struct rx_se_list *) malloc (sizeof (*consed));
*consed = template;
it->data = (void *)consed;
}
return (struct rx_se_list *)it->data;
}
}
#ifdef __STDC__
static struct rx_se_list *
hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog)
#else
static struct rx_se_list *
hash_se_prog (rx, memo, prog)
struct rx * rx;
struct rx_hash * memo;
struct rx_se_list * prog;
#endif
{
struct rx_se_list * answer = 0;
while (prog)
{
answer = hash_cons_se_prog (rx, memo, prog->car, answer);
if (!answer)
return 0;
prog = prog->cdr;
}
return answer;
}
/* {Constructors, etc. for NFA State Sets}
*/
#ifdef __STDC__
static int
nfa_set_cmp (void * va, void * vb)
#else
static int
nfa_set_cmp (va, vb)
void * va;
void * vb;
#endif
{
struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va;
struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb;
return ((va == vb)
? 0
: (!va
? -1
: (!vb
? 1
: (a->car->id < b->car->id
? 1
: (a->car->id > b->car->id
? -1
: nfa_set_cmp ((void *)a->cdr, (void *)b->cdr))))));
}
#ifdef __STDC__
static int
nfa_set_equal (void * va, void * vb)
#else
static int
nfa_set_equal (va, vb)
void * va;
void * vb;
#endif
{
return !nfa_set_cmp (va, vb);
}
static struct rx_hash_rules nfa_set_hash_rules = { nfa_set_equal, 0, 0, 0, 0 };
#ifdef __STDC__
static struct rx_nfa_state_set *
nfa_set_cons (struct rx * rx,
struct rx_hash * memo, struct rx_nfa_state * state,
struct rx_nfa_state_set * set)
#else
static struct rx_nfa_state_set *
nfa_set_cons (rx, memo, state, set)
struct rx * rx;
struct rx_hash * memo;
struct rx_nfa_state * state;
struct rx_nfa_state_set * set;
#endif
{
struct rx_nfa_state_set template;
struct rx_hash_item * node;
template.car = state;
template.cdr = set;
node = rx_hash_store (memo,
(((long)state) >> 8) ^ (long)set,
&template, &nfa_set_hash_rules);
if (!node)
return 0;
if (node->data == &template)
{
struct rx_nfa_state_set * l;
l = (struct rx_nfa_state_set *) malloc (sizeof (*l));
node->data = (void *) l;
if (!l)
return 0;
*l = template;
}
return (struct rx_nfa_state_set *)node->data;
}
#ifdef __STDC__
static struct rx_nfa_state_set *
nfa_set_enjoin (struct rx * rx,
struct rx_hash * memo, struct rx_nfa_state * state,
struct rx_nfa_state_set * set)
#else
static struct rx_nfa_state_set *
nfa_set_enjoin (rx, memo, state, set)
struct rx * rx;
struct rx_hash * memo;
struct rx_nfa_state * state;
struct rx_nfa_state_set * set;
#endif
{
if (!set || (state->id < set->car->id))
return nfa_set_cons (rx, memo, state, set);
if (state->id == set->car->id)
return set;
else
{
struct rx_nfa_state_set * newcdr
= nfa_set_enjoin (rx, memo, state, set->cdr);
if (newcdr != set->cdr)
set = nfa_set_cons (rx, memo, set->car, newcdr);
return set;
}
}
/* {Computing Epsilon/Side-effect Closures.}
*/
struct eclose_frame
{
struct rx_se_list *prog_backwards;
};
/* This is called while computing closures for "outnode".
* The current node in the traversal is "node".
* "frame" contains a list of a all side effects between
* "outnode" and "node" from most to least recent.
*
* Explores forward from "node", adding new possible
* futures to outnode.
*
* Returns 0 on allocation failure.
*/
#ifdef __STDC__
static int
eclose_node (struct rx *rx, struct rx_nfa_state *outnode,
struct rx_nfa_state *node, struct eclose_frame *frame)
#else
static int
eclose_node (rx, outnode, node, frame)
struct rx *rx;
struct rx_nfa_state *outnode;
struct rx_nfa_state *node;
struct eclose_frame *frame;
#endif
{
struct rx_nfa_edge *e = node->edges;
/* For each node, we follow all epsilon paths to build the closure.
* The closure omits nodes that have only epsilon edges.
* The closure is split into partial closures -- all the states in
* a partial closure are reached by crossing the same list of
* of side effects (though not necessarily the same path).
*/
if (node->mark)
return 1;
node->mark = 1;
/* If "node" has more than just epsilon and
* and side-effect transitions (id >= 0), or is final,
* then it has to be added to the possible futures
* of "outnode".
*/
if (node->id >= 0 || node->is_final)
{
struct rx_possible_future **ec;
struct rx_se_list * prog_in_order;
int cmp;
prog_in_order = ((struct rx_se_list *)hash_se_prog (rx,
&rx->se_list_memo,
frame->prog_backwards));
ec = &outnode->futures;
while (*ec)
{
cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order);
if (cmp <= 0)
break;
ec = &(*ec)->next;
}
if (!*ec || (cmp < 0))
{
struct rx_possible_future * pf;
pf = rx_possible_future (rx, prog_in_order);
if (!pf)
return 0;
pf->next = *ec;
*ec = pf;
}
if (node->id >= 0)
{
(*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo,
node, (*ec)->destset);
if (!(*ec)->destset)
return 0;
}
}
/* Recurse on outgoing epsilon and side effect nodes.
*/
while (e)
{
switch (e->type)
{
case ne_epsilon:
if (!eclose_node (rx, outnode, e->dest, frame))
return 0;
break;
case ne_side_effect:
{
frame->prog_backwards = side_effect_cons (rx,
e->params.side_effect,
frame->prog_backwards);
if (!frame->prog_backwards)
return 0;
if (!eclose_node (rx, outnode, e->dest, frame))
return 0;
{
struct rx_se_list * dying = frame->prog_backwards;
frame->prog_backwards = frame->prog_backwards->cdr;
free ((char *)dying);
}
break;
}
default:
break;
}
e = e->next;
}
node->mark = 0;
return 1;
}
#ifdef __STDC__
struct rx_possible_future *
rx_state_possible_futures (struct rx * rx, struct rx_nfa_state * n)
#else
struct rx_possible_future *
rx_state_possible_futures (rx, n)
struct rx * rx;
struct rx_nfa_state * n;
#endif
{
if (n->futures_computed)
return n->futures;
else
{
struct eclose_frame frame;
frame.prog_backwards = 0;
if (!eclose_node (rx, n, n, &frame))
return 0;
else
{
n->futures_computed = 1;
return n->futures;
}
}
}
/* {Storing the NFA in a Contiguous Region of Memory}
*/
#ifdef __STDC__
static void
se_memo_freer (struct rx_hash_item * node)
#else
static void
se_memo_freer (node)
struct rx_hash_item * node;
#endif
{
free ((char *)node->data);
}
#ifdef __STDC__
static void
nfa_set_freer (struct rx_hash_item * node)
#else
static void
nfa_set_freer (node)
struct rx_hash_item * node;
#endif
{
free ((char *)node->data);
}
#ifdef __STDC__
void
rx_free_nfa (struct rx *rx)
#else
void
rx_free_nfa (rx)
struct rx *rx;
#endif
{
rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules);
rx_bzero ((char *)&rx->se_list_memo, sizeof (rx->se_list_memo));
rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules);
rx_bzero ((char *)&rx->set_list_memo, sizeof (rx->set_list_memo));
rx_free_nfa_graph (rx);
}
syntax highlighted by Code2HTML, v. 0.9.1