#include "global.h"
#include "list.h"
#include "irep.h"
#include "fsa.h"
int State::nextStateId = 0;
const int Transition::NOCHAR = 0;
int Transition::minChar = 255;
int Transition::maxChar = 1;
// In addition to initialising instance variables, the constructor
// function for Transition also notes smallest and largest character set
// for optimisation purposes.
Transition::Transition(char t, State* dest)
{
_trigger = t; _destination = dest;
if (t == NOCHAR)
return;
if (t < minChar)
minChar = t;
if (t > maxChar)
maxChar = t;
}
// The findTrigger() member function scans through the list of
// transitions for one with a trigger equal to the target.
Transition *TransitionList::findTrigger(char target)
{
listIter<Transition *> forEach(this);
Transition **item;
while (item = forEach())
if ((*item)->trigger() == target)
return(*item);
return 0;
}
// The following print() functions are merely for debugging purposes.
void State::print(void)
{
printf("state: %d ", id());
if (associatedToken()) printf("%s ", associatedToken()->name());
listIter<Transition *> forEach(&transitions);
Transition **item;
while (item = forEach())
(*item)->print();
printf("\n");
}
void Transition::print(void)
{
if (trigger())
printf("%c -> %d, ", trigger(), destination()->id());
else
printf("[E] -> %d, ", destination()->id());
}
fsaState *fsaState::initialState;
fsaStateList fsaState::stateSet;
// The getClosure() member function finds all null transitions and
// invokes itself recursively on those destination states. It ensures
// that the recursion terminates even when a cycle exist by avoiding a
// transitive operation when a state has been visited.
void fsaState::getClosure(fsaStateList *closure)
{
listIter<Transition *> forEach(&transitions);
Transition **tr;
closure->append(this);
while (tr = forEach())
if ((*tr)->trigger() == Transition::NOCHAR) {
fsaState *s = (fsaState *) (*tr)->destination();
if (!closure->find(s))
s->getClosure(closure);
}
}
// A FSA is generated from the rule specifications by invoked the
// process() member function on each rule body corresponding to a
// token.
void fsaState::makeFsa(void)
{
initialState = new fsaState();
listIter<LexRule *> forEach(&LexRule::ruleSet);
LexRule **rule;
while (rule = forEach()) {
if (!(*rule)->hasDefinition()) {
printf("`%s' has not been defined\n", (*rule)->name());
continue;
}
if ((*rule)->isToken() || (*rule)->isIgnored())
// no fsa if rule does not specify a token or ignored sequence
if ((*rule)->body()) {
// make fsa if token is specified
fsaState *final = (*rule)->process(initialState);
final->associateWith(*rule);
} else
printf("TOKEN def for `%s' is empty\n", (*rule)->name());
}
}
dfsaStateList dfsaState::stateSet;
dfsaState *dfsaState::initialState;
// The deterministic FSA is constructed by finding the closure of the
// initial state of the non-deterministic FSA and then recursively
// completing the machine (within the constructor of dfsaState) with
// appropriate transitions.
void dfsaState::makeDfsa()
{
fsaStateList *newClosure = new fsaStateList();
fsaState::initialState->getClosure(newClosure);
initialState = new dfsaState(newClosure);
}
dfsaState::dfsaState(fsaStateList *c) : State()
{
closure = c;
// The use of the dummy variable is a workaround since
// gcc cannot take the following syntax:
// stateSet.append(this);
dfsaState *dummy = this; stateSet.append(dummy);
completeMachine();
}
// The completeMachine() member function is higher recursive. The
// complete machine is built by finding all successor states for the
// current state, and then completing the machine for the successor
// states until no new states are found.
void dfsaState::completeMachine()
{
int c;
fsaStateList *newClosure;
newClosure = new fsaStateList();
for (c = Transition::minChar; c <= Transition::maxChar; c++) {
listIter<fsaState *> forEach(closure);
fsaState **state;
while (state = forEach()) {
// For each symbol c and each state in the closure, the closure of the
// destination state is computed.
listIter<Transition *> forEach(&(*state)->transitions);
Transition **tr;
while (tr = forEach())
if ((*tr)->trigger() == c)
((fsaState *)(*tr)->destination())->getClosure(newClosure);
}
if (!newClosure->empty()) {
// An empty closure set implies the absence of transitions, and thus
// need not be considered. On the other hand, a non-empty closure
// that has existed implies that the state has already been encountered,
// and thus need not be re-considered.
dfsaState *dest = stateSet.matchClosure(newClosure);
if (dest)
// dest already exist -- do not bother about building it
delete newClosure;
else {
// dest does not already exist and thus must be created
dest = new dfsaState(newClosure);
State* final = dest->closure->findFinalState();
if (final)
dest->associateWith(final->associatedToken());
}
newClosure = new fsaStateList();
transitions.append(new Transition(c, dest));
}
}
delete newClosure;
}
// Two closure sets are equal if they contain each other. The search
// relies on the iterator function to scan down the list of
// deterministic FSA states.
dfsaState *dfsaStateList::matchClosure(fsaStateList *a)
{
listIter<dfsaState *> forEach(this);
dfsaState **item;
while (item = forEach())
if ((*item)->closure->length() == a->length() &&
// While the length() test is not necessary,
// in most cases, it avoids calling contains().
(*item)->closure->contains(*a) &&
a->contains(*(*item)->closure))
return(*item);
return 0;
}
// The findFinalState() member function maintains the final state
// correspondence between the non-deterministic and deterministic
// states.
fsaState *fsaStateList::findFinalState(void)
{
listIter<fsaState *> forEach(this);
fsaState **item;
while (item = forEach())
if ((*item)->associatedToken())
return(*item);
return 0;
}
odfsaStateList odfsaState::stateSet;
odfsaState *odfsaState::finalPartition;
odfsaState *odfsaState::nonfinalPartition;
odfsaState *odfsaState::initialPartition;
// The SubgroupInfo class allows for partition consistency checks as
// required in optimising a FSA.
class SubgroupInfo {
public:
dfsaStateList *origStates;
odfsaState* destPartition;
LexRule *token;
SubgroupInfo(odfsaState *d, LexRule *f)
{
origStates = new dfsaStateList();
destPartition = d; token = f;
}
~SubgroupInfo()
{
if (origStates) {
delete origStates;
origStates = 0;
}
}
};
class SubgroupInfoList : public list<SubgroupInfo *> {
public:
void enter(dfsaState *, odfsaState *);
};
// The enter() member function registers a source and destination state
// pair into the SubgroupInfoList. Checks are subsequently performed by
// the member function checkPartition().
void SubgroupInfoList::enter(dfsaState *src, odfsaState *dest)
{
listIter<SubgroupInfo *> forEach(this);
SubgroupInfo **item;
while (item = forEach())
// Attempt to find a subgroup which includes the destination state.
if ((*item)->destPartition == dest &&
(*item)->token == src->associatedToken())
break;
SubgroupInfo *p;
if (item)
p = *item;
else {
p = new SubgroupInfo(dest, src->associatedToken());
insert(p);
}
p->origStates->append(src);
}
// An FSA is optimised by making a minimal partition and then
// iteratively check the consistency. Where a partition is not
// consistent, it is further repartitioned.
void odfsaState::optimiseDfsa(void)
{
// We start by making a most optimistic partition of two: one which
// contains final states and the other, non-final states.
finalPartition = new odfsaState();
nonfinalPartition = new odfsaState();
{
listIter<dfsaState *> forEach(&dfsaState::stateSet);
dfsaState **p;
while (p = forEach())
if ((*p)->associatedToken())
finalPartition->constituents->append(*p);
else
nonfinalPartition->constituents->append(*p);
}
// The process of ensuring that partitions are consistent is iterative
// because a later repartition might make an earlier partitions
// inconsistent.
bool consistent;
do {
listIter<odfsaState *> forEach(&stateSet);
odfsaState **p;
consistent = TRUE;
while (p = forEach())
if (!(*p)->checkPartition())
consistent = FALSE;
} while (!consistent);
initialPartition = stateSet.matchConstituents(dfsaState::initialState);
// When partition information is confirmed, transition information
// is transfered over from the non-optimised machine.
{
listIter<odfsaState *> forEach(&stateSet);
odfsaState **p;
while (p = forEach())
(*p)->includeTransitions();
}
}
// Consistency checking of partitions is performed by recording
// destination states for every transition. If the destination states
// belong to the same partition, it is not invalid.
bool odfsaState::checkPartition(void)
{
SubgroupInfoList subgroups;
for (int ch = Transition::minChar; ch <= Transition::maxChar; ch++) {
listIter<dfsaState *> forEach(constituents);
dfsaState **s;
while (s = forEach()) {
// For each constituent state in the partition and each symbol, the
// destination state is recorded.
Transition *tr = (*s)->transitions.findTrigger(ch);
dfsaState *dest = (tr ? (dfsaState *)tr->destination() : 0);
subgroups.enter(*s, stateSet.matchConstituents(dest));
}
if (subgroups.length() == 1) {
// The partition has not been found to be inconsistent if the
// destination states belong in the same partition.
SubgroupInfo *g = subgroups.get();
associateWith(g->token);
delete g;
} else {
// The partition is found to be inconsistent if the destination states
// belong in different partitions. Here, the partition is further
// divided according to subgroup information. In addition,
// checkPartition() returns with a FALSE status so that the consistency
// check may be restarted.
odfsaState *newPartn;
while (subgroups.length()) {
SubgroupInfo *g = subgroups.get();
if (subgroups.empty()) {
delete constituents;
constituents = new dfsaStateList();
newPartn = this;
} else
newPartn = new odfsaState();
newPartn->associateWith(g->token);
newPartn->constituents = g->origStates;
g ->origStates = 0;
delete g;
}
return FALSE;
}
}
return TRUE;
}
// The includeTransitions() member function transfers the transition
// information from the deterministic machine to the optimised machine.
void odfsaState::includeTransitions(void)
{
for (int ch = Transition::minChar; ch <= Transition::maxChar; ch++) {
listIter<dfsaState *> forEach(constituents);
dfsaState **s;
while (s = forEach()) {
Transition *tr = (*s)->transitions.findTrigger(ch);
if (tr && tr->destination()) {
dfsaState *_dest = (dfsaState *) tr->destination();
odfsaState *dest = stateSet.matchConstituents(_dest);
transitions.append(new Transition(ch, dest));
break;
}
}
}
}
void odfsaState::print(void)
{
#ifdef DEBUGPRINT
printf("<");
listIter<dfsaState *> forEach(constituents);
dfsaState **s;
while (s = forEach()) {
printf("%d ", (*s)->id());
}
printf("> ");
#endif
State::print();
}
odfsaState *odfsaStateList::matchConstituents(dfsaState *target)
{
listIter<odfsaState *> forEach(this);
odfsaState **item;
while (item = forEach())
if ((*item)->constituents->find(target))
return(*item);
return 0;
}
syntax highlighted by Code2HTML, v. 0.9.1