#include "global.h" #include "list.h" #include "irep.h" #include "fsa.h" int State::nextStateId = 0; const int Transition::NOCHAR = 0; int Transition::minChar = 255; int Transition::maxChar = 1; // In addition to initialising instance variables, the constructor // function for Transition also notes smallest and largest character set // for optimisation purposes. Transition::Transition(char t, State* dest) { _trigger = t; _destination = dest; if (t == NOCHAR) return; if (t < minChar) minChar = t; if (t > maxChar) maxChar = t; } // The findTrigger() member function scans through the list of // transitions for one with a trigger equal to the target. Transition *TransitionList::findTrigger(char target) { listIter forEach(this); Transition **item; while (item = forEach()) if ((*item)->trigger() == target) return(*item); return 0; } // The following print() functions are merely for debugging purposes. void State::print(void) { printf("state: %d ", id()); if (associatedToken()) printf("%s ", associatedToken()->name()); listIter forEach(&transitions); Transition **item; while (item = forEach()) (*item)->print(); printf("\n"); } void Transition::print(void) { if (trigger()) printf("%c -> %d, ", trigger(), destination()->id()); else printf("[E] -> %d, ", destination()->id()); } fsaState *fsaState::initialState; fsaStateList fsaState::stateSet; // The getClosure() member function finds all null transitions and // invokes itself recursively on those destination states. It ensures // that the recursion terminates even when a cycle exist by avoiding a // transitive operation when a state has been visited. void fsaState::getClosure(fsaStateList *closure) { listIter forEach(&transitions); Transition **tr; closure->append(this); while (tr = forEach()) if ((*tr)->trigger() == Transition::NOCHAR) { fsaState *s = (fsaState *) (*tr)->destination(); if (!closure->find(s)) s->getClosure(closure); } } // A FSA is generated from the rule specifications by invoked the // process() member function on each rule body corresponding to a // token. void fsaState::makeFsa(void) { initialState = new fsaState(); listIter forEach(&LexRule::ruleSet); LexRule **rule; while (rule = forEach()) { if (!(*rule)->hasDefinition()) { printf("`%s' has not been defined\n", (*rule)->name()); continue; } if ((*rule)->isToken() || (*rule)->isIgnored()) // no fsa if rule does not specify a token or ignored sequence if ((*rule)->body()) { // make fsa if token is specified fsaState *final = (*rule)->process(initialState); final->associateWith(*rule); } else printf("TOKEN def for `%s' is empty\n", (*rule)->name()); } } dfsaStateList dfsaState::stateSet; dfsaState *dfsaState::initialState; // The deterministic FSA is constructed by finding the closure of the // initial state of the non-deterministic FSA and then recursively // completing the machine (within the constructor of dfsaState) with // appropriate transitions. void dfsaState::makeDfsa() { fsaStateList *newClosure = new fsaStateList(); fsaState::initialState->getClosure(newClosure); initialState = new dfsaState(newClosure); } dfsaState::dfsaState(fsaStateList *c) : State() { closure = c; // The use of the dummy variable is a workaround since // gcc cannot take the following syntax: // stateSet.append(this); dfsaState *dummy = this; stateSet.append(dummy); completeMachine(); } // The completeMachine() member function is higher recursive. The // complete machine is built by finding all successor states for the // current state, and then completing the machine for the successor // states until no new states are found. void dfsaState::completeMachine() { int c; fsaStateList *newClosure; newClosure = new fsaStateList(); for (c = Transition::minChar; c <= Transition::maxChar; c++) { listIter forEach(closure); fsaState **state; while (state = forEach()) { // For each symbol c and each state in the closure, the closure of the // destination state is computed. listIter forEach(&(*state)->transitions); Transition **tr; while (tr = forEach()) if ((*tr)->trigger() == c) ((fsaState *)(*tr)->destination())->getClosure(newClosure); } if (!newClosure->empty()) { // An empty closure set implies the absence of transitions, and thus // need not be considered. On the other hand, a non-empty closure // that has existed implies that the state has already been encountered, // and thus need not be re-considered. dfsaState *dest = stateSet.matchClosure(newClosure); if (dest) // dest already exist -- do not bother about building it delete newClosure; else { // dest does not already exist and thus must be created dest = new dfsaState(newClosure); State* final = dest->closure->findFinalState(); if (final) dest->associateWith(final->associatedToken()); } newClosure = new fsaStateList(); transitions.append(new Transition(c, dest)); } } delete newClosure; } // Two closure sets are equal if they contain each other. The search // relies on the iterator function to scan down the list of // deterministic FSA states. dfsaState *dfsaStateList::matchClosure(fsaStateList *a) { listIter forEach(this); dfsaState **item; while (item = forEach()) if ((*item)->closure->length() == a->length() && // While the length() test is not necessary, // in most cases, it avoids calling contains(). (*item)->closure->contains(*a) && a->contains(*(*item)->closure)) return(*item); return 0; } // The findFinalState() member function maintains the final state // correspondence between the non-deterministic and deterministic // states. fsaState *fsaStateList::findFinalState(void) { listIter forEach(this); fsaState **item; while (item = forEach()) if ((*item)->associatedToken()) return(*item); return 0; } odfsaStateList odfsaState::stateSet; odfsaState *odfsaState::finalPartition; odfsaState *odfsaState::nonfinalPartition; odfsaState *odfsaState::initialPartition; // The SubgroupInfo class allows for partition consistency checks as // required in optimising a FSA. class SubgroupInfo { public: dfsaStateList *origStates; odfsaState* destPartition; LexRule *token; SubgroupInfo(odfsaState *d, LexRule *f) { origStates = new dfsaStateList(); destPartition = d; token = f; } ~SubgroupInfo() { if (origStates) { delete origStates; origStates = 0; } } }; class SubgroupInfoList : public list { public: void enter(dfsaState *, odfsaState *); }; // The enter() member function registers a source and destination state // pair into the SubgroupInfoList. Checks are subsequently performed by // the member function checkPartition(). void SubgroupInfoList::enter(dfsaState *src, odfsaState *dest) { listIter forEach(this); SubgroupInfo **item; while (item = forEach()) // Attempt to find a subgroup which includes the destination state. if ((*item)->destPartition == dest && (*item)->token == src->associatedToken()) break; SubgroupInfo *p; if (item) p = *item; else { p = new SubgroupInfo(dest, src->associatedToken()); insert(p); } p->origStates->append(src); } // An FSA is optimised by making a minimal partition and then // iteratively check the consistency. Where a partition is not // consistent, it is further repartitioned. void odfsaState::optimiseDfsa(void) { // We start by making a most optimistic partition of two: one which // contains final states and the other, non-final states. finalPartition = new odfsaState(); nonfinalPartition = new odfsaState(); { listIter forEach(&dfsaState::stateSet); dfsaState **p; while (p = forEach()) if ((*p)->associatedToken()) finalPartition->constituents->append(*p); else nonfinalPartition->constituents->append(*p); } // The process of ensuring that partitions are consistent is iterative // because a later repartition might make an earlier partitions // inconsistent. bool consistent; do { listIter forEach(&stateSet); odfsaState **p; consistent = TRUE; while (p = forEach()) if (!(*p)->checkPartition()) consistent = FALSE; } while (!consistent); initialPartition = stateSet.matchConstituents(dfsaState::initialState); // When partition information is confirmed, transition information // is transfered over from the non-optimised machine. { listIter forEach(&stateSet); odfsaState **p; while (p = forEach()) (*p)->includeTransitions(); } } // Consistency checking of partitions is performed by recording // destination states for every transition. If the destination states // belong to the same partition, it is not invalid. bool odfsaState::checkPartition(void) { SubgroupInfoList subgroups; for (int ch = Transition::minChar; ch <= Transition::maxChar; ch++) { listIter forEach(constituents); dfsaState **s; while (s = forEach()) { // For each constituent state in the partition and each symbol, the // destination state is recorded. Transition *tr = (*s)->transitions.findTrigger(ch); dfsaState *dest = (tr ? (dfsaState *)tr->destination() : 0); subgroups.enter(*s, stateSet.matchConstituents(dest)); } if (subgroups.length() == 1) { // The partition has not been found to be inconsistent if the // destination states belong in the same partition. SubgroupInfo *g = subgroups.get(); associateWith(g->token); delete g; } else { // The partition is found to be inconsistent if the destination states // belong in different partitions. Here, the partition is further // divided according to subgroup information. In addition, // checkPartition() returns with a FALSE status so that the consistency // check may be restarted. odfsaState *newPartn; while (subgroups.length()) { SubgroupInfo *g = subgroups.get(); if (subgroups.empty()) { delete constituents; constituents = new dfsaStateList(); newPartn = this; } else newPartn = new odfsaState(); newPartn->associateWith(g->token); newPartn->constituents = g->origStates; g ->origStates = 0; delete g; } return FALSE; } } return TRUE; } // The includeTransitions() member function transfers the transition // information from the deterministic machine to the optimised machine. void odfsaState::includeTransitions(void) { for (int ch = Transition::minChar; ch <= Transition::maxChar; ch++) { listIter forEach(constituents); dfsaState **s; while (s = forEach()) { Transition *tr = (*s)->transitions.findTrigger(ch); if (tr && tr->destination()) { dfsaState *_dest = (dfsaState *) tr->destination(); odfsaState *dest = stateSet.matchConstituents(_dest); transitions.append(new Transition(ch, dest)); break; } } } } void odfsaState::print(void) { #ifdef DEBUGPRINT printf("<"); listIter forEach(constituents); dfsaState **s; while (s = forEach()) { printf("%d ", (*s)->id()); } printf("> "); #endif State::print(); } odfsaState *odfsaStateList::matchConstituents(dfsaState *target) { listIter forEach(this); odfsaState **item; while (item = forEach()) if ((*item)->constituents->find(target)) return(*item); return 0; }