#include	"global.h"
#include	"list.h"
#include	"irep.h"
#include	"fsa.h"

int State::nextStateId = 0;

const int Transition::NOCHAR = 0;
int Transition::minChar = 255;
int Transition::maxChar = 1;

// In addition to initialising instance variables, the constructor
// function for Transition also notes smallest and largest character set
// for optimisation purposes.

Transition::Transition(char t, State* dest)
{
	_trigger = t; _destination = dest;
	if (t == NOCHAR)
		return;
	if (t < minChar)
		minChar = t;
	if (t > maxChar)
		maxChar = t;
}

// The findTrigger() member function scans through the list of
// transitions for one with a trigger equal to the target.

Transition *TransitionList::findTrigger(char target)
{
	listIter<Transition *> forEach(this);
	Transition **item;

	while (item = forEach())
		if ((*item)->trigger() == target)
			return(*item);
	return 0;
}

// The following print() functions are merely for debugging purposes.

void State::print(void)
{
	printf("state: %d ", id());
	if (associatedToken()) printf("%s ", associatedToken()->name());
	listIter<Transition *> forEach(&transitions);
	Transition **item;
	while (item = forEach())
		(*item)->print();
	printf("\n");
}

void Transition::print(void)
{
	if (trigger())
		printf("%c -> %d, ", trigger(), destination()->id());
	else
		printf("[E] -> %d, ", destination()->id());
}

fsaState *fsaState::initialState;
fsaStateList fsaState::stateSet;

// The getClosure() member function finds all null transitions and
// invokes itself recursively on those destination states.  It ensures
// that the recursion terminates even when a cycle exist by avoiding a
// transitive operation when a state has been visited.

void fsaState::getClosure(fsaStateList *closure)
{
	listIter<Transition *> forEach(&transitions);
	Transition **tr;

	closure->append(this);
	while (tr = forEach())
		if ((*tr)->trigger() == Transition::NOCHAR) {
			fsaState *s = (fsaState *) (*tr)->destination();
			if (!closure->find(s))
				s->getClosure(closure);
		}
	
}

// A FSA is generated from the rule specifications by invoked the
// process() member function on each rule body corresponding to a
// token.

void fsaState::makeFsa(void)
{
	initialState = new fsaState();
	listIter<LexRule *> forEach(&LexRule::ruleSet);
	LexRule **rule;

	while (rule = forEach()) {
	    if (!(*rule)->hasDefinition()) {
		printf("`%s' has not been defined\n", (*rule)->name());
		continue;
	    }
	    if ((*rule)->isToken() || (*rule)->isIgnored())
		// no fsa if rule does not specify a token or ignored sequence
		if ((*rule)->body()) {
		    // make fsa if token is specified
		    fsaState *final = (*rule)->process(initialState);
		    final->associateWith(*rule);
		} else
		    printf("TOKEN def for `%s' is empty\n", (*rule)->name());
	}
}

dfsaStateList dfsaState::stateSet;
dfsaState *dfsaState::initialState;

// The deterministic FSA is constructed by finding the closure of the
// initial state of the non-deterministic FSA and then recursively
// completing the machine (within the constructor of dfsaState) with
// appropriate transitions.

void dfsaState::makeDfsa()
{
	fsaStateList *newClosure = new fsaStateList();
	fsaState::initialState->getClosure(newClosure);
	initialState = new dfsaState(newClosure);
}

dfsaState::dfsaState(fsaStateList *c) : State()
{
	closure = c;
        // The use of the dummy variable is a workaround since
        // gcc cannot take the following syntax:
        // stateSet.append(this);
        dfsaState *dummy = this; stateSet.append(dummy);
	completeMachine();
}

// The completeMachine() member function is higher recursive.  The
// complete machine is built by finding all successor states for the
// current state, and then completing the machine for the successor
// states until no new states are found.

void dfsaState::completeMachine()
{
    int c;
    fsaStateList *newClosure;

    newClosure = new fsaStateList();
    for (c = Transition::minChar; c <= Transition::maxChar; c++) {
	listIter<fsaState *> forEach(closure);
	fsaState **state;
	while (state = forEach()) {

// For each symbol c and each state in the closure, the closure of the
// destination state is computed.

	    listIter<Transition *> forEach(&(*state)->transitions);
	    Transition **tr;
	    while (tr = forEach())
		if ((*tr)->trigger() == c)
		    ((fsaState *)(*tr)->destination())->getClosure(newClosure);
	}
	if (!newClosure->empty()) {

// An empty closure set implies the absence of transitions, and thus
// need not be considered.  On the other hand, a non-empty closure
// that has existed implies that the state has already been encountered,
// and thus need not be re-considered.

	    dfsaState *dest = stateSet.matchClosure(newClosure);
	    if (dest)
//              dest already exist -- do not bother about building it
		delete newClosure;
	    else {
//              dest does not already exist and thus must be created
		dest = new dfsaState(newClosure);
		State* final = dest->closure->findFinalState();
		if (final)
			dest->associateWith(final->associatedToken());
	    }
	    newClosure = new fsaStateList();
	    transitions.append(new Transition(c, dest));
	}
    }
    delete newClosure;
}

// Two closure sets are equal if they contain each other.  The search
// relies on the iterator function to scan down the list of
// deterministic FSA states.

dfsaState *dfsaStateList::matchClosure(fsaStateList *a)
{
	listIter<dfsaState *> forEach(this);
	dfsaState **item;

	while (item = forEach())
		if ((*item)->closure->length() == a->length() &&
                        // While the length() test is not necessary,
                        // in most cases, it avoids calling contains().
			(*item)->closure->contains(*a) &&
			a->contains(*(*item)->closure))
			return(*item);
	return 0;
}

// The findFinalState() member function maintains the final state
// correspondence between the non-deterministic and deterministic
// states.

fsaState *fsaStateList::findFinalState(void)
{
	listIter<fsaState *> forEach(this);
	fsaState **item;

	while (item = forEach())
		if ((*item)->associatedToken())
			return(*item);
	return 0;
}

odfsaStateList odfsaState::stateSet;
odfsaState *odfsaState::finalPartition;
odfsaState *odfsaState::nonfinalPartition;
odfsaState *odfsaState::initialPartition;

// The SubgroupInfo class allows for partition consistency checks as
// required in optimising a FSA.

class SubgroupInfo {
public:
	dfsaStateList *origStates;
	odfsaState* destPartition;
	LexRule *token;
	SubgroupInfo(odfsaState *d, LexRule *f)
	{
		origStates = new dfsaStateList();
		destPartition = d; token = f;
	}
	~SubgroupInfo()
	{
		if (origStates) {
			delete origStates;
			origStates = 0;
		}
	}
};

class SubgroupInfoList : public list<SubgroupInfo *> {
public:
	void enter(dfsaState *, odfsaState *);
};

// The enter() member function registers a source and destination state
// pair into the SubgroupInfoList.  Checks are subsequently performed by
// the member function checkPartition().

void SubgroupInfoList::enter(dfsaState *src, odfsaState *dest)
{
	listIter<SubgroupInfo *> forEach(this);
	SubgroupInfo **item;
	while (item = forEach())

// Attempt to find a subgroup which includes the destination state.

		if ((*item)->destPartition == dest &&
			(*item)->token == src->associatedToken())
			break;

	SubgroupInfo *p;
	if (item)
	    p = *item;
	else  {
	    p = new SubgroupInfo(dest, src->associatedToken());
	    insert(p);
	}
	p->origStates->append(src);
}

// An FSA is optimised by making a minimal partition and then
// iteratively check the consistency.  Where a partition is not
// consistent, it is further repartitioned.

void odfsaState::optimiseDfsa(void)
{
// We start by making a most optimistic partition of two: one which
// contains final states and the other, non-final states.

    finalPartition = new odfsaState();
    nonfinalPartition = new odfsaState();
    {
	listIter<dfsaState *> forEach(&dfsaState::stateSet);
	dfsaState **p;
	while (p = forEach())
	    if ((*p)->associatedToken())
		finalPartition->constituents->append(*p);
	    else
		nonfinalPartition->constituents->append(*p);
    }

// The process of ensuring that partitions are consistent is iterative
// because a later repartition might make an earlier partitions
// inconsistent.

    bool consistent;
    do {
	listIter<odfsaState *> forEach(&stateSet);
	odfsaState **p;
	consistent = TRUE;
	while (p = forEach())
	    if (!(*p)->checkPartition())
	    	consistent = FALSE;
    } while (!consistent);
    initialPartition = stateSet.matchConstituents(dfsaState::initialState);

// When partition information is confirmed, transition information
// is transfered over from the non-optimised machine.

    {
	listIter<odfsaState *> forEach(&stateSet);
	odfsaState **p;
	while (p = forEach())
	    (*p)->includeTransitions();
    }
}

// Consistency checking of partitions is performed by recording
// destination states for every transition.  If the destination states
// belong to the same partition, it is not invalid.

bool odfsaState::checkPartition(void)
{
    SubgroupInfoList subgroups;

    for (int ch = Transition::minChar; ch <= Transition::maxChar; ch++) {
	listIter<dfsaState *> forEach(constituents);
	dfsaState **s;
	while (s = forEach()) {

// For each constituent state in the partition and each symbol, the
// destination state is recorded.

	    Transition *tr = (*s)->transitions.findTrigger(ch);
	    dfsaState *dest = (tr ? (dfsaState *)tr->destination() : 0);
	    subgroups.enter(*s, stateSet.matchConstituents(dest));
	}
	if (subgroups.length() == 1) {

// The partition has not been found to be inconsistent if the
// destination states belong in the same partition.

		SubgroupInfo *g = subgroups.get();
		associateWith(g->token);
		delete g;
	} else {

// The partition is found to be inconsistent if the destination states
// belong in different partitions.  Here, the partition is further
// divided according to subgroup information.  In addition,
// checkPartition() returns with a FALSE status so that the consistency
// check may be restarted.

		odfsaState *newPartn;
		while (subgroups.length()) {
			SubgroupInfo *g = subgroups.get();
			if (subgroups.empty()) {
				delete constituents;
				constituents = new dfsaStateList();
				newPartn = this;
			} else
				newPartn = new odfsaState();
			newPartn->associateWith(g->token);
			newPartn->constituents = g->origStates;
			g ->origStates = 0;
			delete g;
		}
		return FALSE;
	}
    }
    return TRUE;
}

// The includeTransitions() member function transfers the transition
// information from the deterministic machine to the optimised machine.

void odfsaState::includeTransitions(void)
{
    for (int ch = Transition::minChar; ch <= Transition::maxChar; ch++) {
	listIter<dfsaState *> forEach(constituents);
	dfsaState **s;
	while (s = forEach()) {
	    Transition *tr = (*s)->transitions.findTrigger(ch);
	    if (tr && tr->destination()) {
		dfsaState *_dest = (dfsaState *) tr->destination();
		odfsaState *dest = stateSet.matchConstituents(_dest);
	    	transitions.append(new Transition(ch, dest));
		break;
	    }
	}
    }
}

void odfsaState::print(void)
{
#ifdef	DEBUGPRINT
	printf("<");
	listIter<dfsaState *> forEach(constituents);
	dfsaState **s;
	while (s = forEach()) {
		printf("%d ", (*s)->id());
	}
	printf("> ");
#endif
	State::print();
}

odfsaState *odfsaStateList::matchConstituents(dfsaState *target)
{
	listIter<odfsaState *> forEach(this);
	odfsaState **item;

	while (item = forEach())
		if ((*item)->constituents->find(target))
			return(*item);
	return 0;
}


syntax highlighted by Code2HTML, v. 0.9.1