/* Web Polygraph http://www.web-polygraph.org/
* (C) 2003-2006 The Measurement Factory
* Licensed under the Apache License, Version 2.0 */
#include "xstd/xstd.h"
#include "xstd/h/iomanip.h"
#include "xstd/String.h"
#include "xstd/StrIdentifier.h"
/* StrIdentifier */
StrIdentifier::StrIdentifier(): theCount(0), theLastId(0), isLocked(false) {
}
StrIdentifier::~StrIdentifier() {
while (theStrings.count()) delete theStrings.pop();
}
int StrIdentifier::add(const String &str) {
return doAdd(str, ++theLastId);
}
int StrIdentifier::add(const String &str, int id) {
Assert(theLastId != id); // weak check for uniqueness
return doAdd(str, theLastId = id);
}
int StrIdentifier::doAdd(const String &str, int id) {
Assert(!isLocked); // no additions if optimize()-d
String *copy = new String(str);
theHead.add(copy, id, -1);
theStrings.put(copy, id);
theCount++;
return id;
}
StrIdentifierIter StrIdentifier::iterator() const {
return StrIdentifierIter(*this);
}
int StrIdentifier::lookup(const String &str) const {
return lookup(str.data(), str.len());
}
int StrIdentifier::lookup(const char *buf, int len) const {
return theHead.lookup(buf, len);
}
// collapses single-branch paths
void StrIdentifier::optimize() {
// new entries cannot be added from now on
// note: we can support run-time additions, but Node::add
// must not assume sequntial lookup positions
isLocked = true;
theHead.optimize();
}
void StrIdentifier::report(ostream &os) const {
theHead.report(os, 0);
}
/* StrIdentifierIter */
StrIdentifierIter::StrIdentifierIter(const StrIdentifier &anIdfr):
theIdfr(anIdfr), thePos(0) {
sync();
}
StrIdentifierIter &StrIdentifierIter::operator ++() {
if (!atEnd()) {
++thePos;
sync();
}
return *this;
}
void StrIdentifierIter::sync() {
while (thePos < theIdfr.theStrings.count() && !theIdfr.theStrings[thePos]) {
++thePos;
}
}
bool StrIdentifierIter::atEnd() const {
return thePos >= theIdfr.theStrings.count();
}
const String &StrIdentifierIter::str() const {
Assert(!atEnd());
return *theIdfr.theStrings[thePos];
}
int StrIdentifierIter::id() const {
Assert(!atEnd());
return thePos; // same as id in this StrIdentifier implementation
}
/* StrIdNode */
StrIdNode::StrIdNode(): theId(0) {
u.theTab = 0;
u.theStr = 0;
}
void StrIdNode::add(String *str, int id, int myPos) {
if (!theId) { // virgin node
Assert(id > 0 && str);
theId = id;
u.theStr = str;
} else
if (theId > 0) { // conflict, create next level table
String *s = u.theStr;
u.theTab = new StrIdTable(myPos+1);
u.theTab->add(s, theId);
u.theTab->add(str, id);
theId = -1;
} else { // intermediate node, move further
u.theTab->add(str, id);
}
}
int StrIdNode::lookup(const char *buf, int len) const {
if (!theId)
return 0;
else
if (theId > 0)
return u.theStr->casePrefixOf(buf, len) ? theId : 0;
else
return u.theTab->lookup(buf, len);
}
void StrIdNode::optimize() {
// shrink outgoing branch link by link until hit a fan-out
StrIdNode n;
while (theId < 0 && u.theTab->single(n)) {
delete u.theTab;
*this = n;
}
// recurse if needed
if (theId < 0)
u.theTab->optimize();
}
void StrIdNode::report(ostream &os, int myLevel) const {
if (!theId)
os << "(0) <none>" << endl;
else
if (theId > 0)
os << '(' << theId << ") " << *u.theStr << endl;
else
if (theId < 0)
u.theTab->report(os, myLevel+1);
}
/* StrIdTable */
StrIdTable::StrIdTable(int aPos): theLookupPos(aPos) {
Assert(0 <= theLookupPos && theLookupPos <= 255);
}
void StrIdTable::add(String *str, int id) {
const int idx = lookupIdx(*str);
if (idx >= theNodes.capacity()) { // out of bounds
theNodes.stretch(idx+1);
theNodes.count(theNodes.capacity());
}
theNodes[idx].add(str, id, theLookupPos);
}
int StrIdTable::lookup(const char *buf, int len) const {
const int idx = lookupIdx(buf, len);
if (idx < 0) // buffer is too short already
return 0;
else
if (idx >= theNodes.count()) // lookup char is out of bounds
return 0;
else
return theNodes[idx].lookup(buf, len);
}
void StrIdTable::optimize() {
for (int i = 0; i < theNodes.count(); ++i) {
if (!theNodes[i].dead())
theNodes[i].optimize();
}
}
bool StrIdTable::single(StrIdNode &n) {
// to optimize we could've keep track on the number of live nodes
// but it is combersome because of node conflicts/propogation
int idx = -1;
for (int i = 0; i < theNodes.count(); ++i) {
if (!theNodes[i].dead()) {
if (idx >= 0)
return false; // more than one found
idx = i;
}
}
if (idx < 0) // none found
return false;
n = theNodes[idx];
return true;
}
void StrIdTable::report(ostream &os, int myLevel) const {
os << endl;
for (int i = 0; i < theNodes.count(); ++i) {
if (!theNodes[i].dead()) {
os << setw(2*myLevel) << " ";
if (i > 32)
os << (char)i;
else
os << '<' << i << '>';
os << ": ";
theNodes[i].report(os, myLevel);
}
}
}
syntax highlighted by Code2HTML, v. 0.9.1