/* Web Polygraph http://www.web-polygraph.org/ * (C) 2003-2006 The Measurement Factory * Licensed under the Apache License, Version 2.0 */ #include "pgl/pgl.h" #include #include "xstd/RegEx.h" #include "xstd/Size.h" #include "xstd/gadgets.h" #include "base/opts.h" #include "base/polyOpts.h" #include "base/CmdLine.h" #include "base/RndPermut.h" #include "pgl/PglPp.h" #include "pgl/PglCtx.h" #include "pgl/PglStaticSemx.h" #include "pgl/PglContainerSym.h" #include "pgl/PglStringSym.h" #include "pgl/PglRegExSym.h" #include "pgl/AgentArrIter.h" #include "pgl/AclSym.h" #include "pgl/RobotSym.h" class MyOpts: public OptGrp { public: MyOpts(): theHelpOpt(this, "help", "list of options"), theVersOpt(this, "version", "package version info"), theCfgName(this, "config ", "PGL configuration"), theCfgDirs(this, "cfg_dirs ", "directories for PGL #includes"), theGlbRngSeed(this, "global_rng_seed ","per-test r.n.g. seed", 1) {} virtual bool validate() const; public: HelpOpt theHelpOpt; VersionOpt theVersOpt; StrOpt theCfgName; StrArrOpt theCfgDirs; IntOpt theGlbRngSeed; }; class Rules; class RuleCondnItem { public: RuleCondnItem(RegEx *anExpr = 0, RegExExpr::Oper anOper = RegExExpr::opNone): oper(anOper), expr(anExpr) {} bool operator ==(const RuleCondnItem &i) const; bool operator !=(const RuleCondnItem &i) const { return !(*this == i); } int cmp(const RuleCondnItem &i) const; RuleCondnItem negate() const; ostream &print(ostream &os) const; public: RegExExpr::Oper oper; RegEx *expr; }; class RuleCondn; class RuleCondnHash { public: RuleCondnHash(const RuleCondn &aBase, int expCount = 0); int expCount() const { return theExpCount; } int capacity() const { return theIds.capacity(); } int idxOf(const RuleCondnItem &i) const; // returns -1 if none void add(const RuleCondnItem &i, int idx); protected: int hash(const RuleCondnItem &i) const; protected: const RuleCondn &theBase; Array theIds; int theExpCount; int theCount; }; // array of possibly negated RegExes class RuleCondn { public: RuleCondn(int aCapacity = 0); int count() const { return theItems.count(); } bool has(const RuleCondnItem &h, int offset = 0) const; const RuleCondnItem &item(int idx) const { return theItems.item(idx); } bool operator ==(const RuleCondn &condn) const; bool operator !=(const RuleCondn &condn) const { return !(*this == condn); } int cmp(const RuleCondn &c) const; bool possible() const; bool covers(const RuleCondn &c) const; void add(const RuleCondnItem &i); void sort(); void prune(); void merge(const RuleCondn &c); void simplify(Rules &rules, int offset); ostream &print(ostream &os) const; protected: void createHash(int expCount = 8); void reHash(); private: Array theItems; RuleCondnHash *theHash; }; // condition -> action class AclRule { public: enum Action { rlUnknown, rlAllow, rlDeny, rlRewrite, rlAny }; public: AclRule(Action anAction, RuleCondn *aCondn); Action action() const { return theAction; } const RuleCondn &condn() const { return *theCondn; } bool operator ==(const AclRule &rule) const; bool operator !=(const AclRule &rule) const { return !(*this == rule); } int cmp(const AclRule &r) const; bool covers(const AclRule &r) const; void sort(); void prune(); void simplify(Rules &rules, int offset); ostream &print(ostream &os) const; protected: Action theAction; RuleCondn *theCondn; }; // array of rules class Rules: public Array { public: Rules(): Array(16) {} ~Rules() { while (count()) delete pop(); } const AclRule *covered(const AclRule &r, int excIdx = -1) const; //const AclRule *haveOtherAction(const AclRule &rule) const; //const AclRule *haveAnyAction(const AclRule &rule) const; const AclRule *commit(const AclRule &rule); void sort(); void prune(); void simplify(); ostream &print(ostream &os) const; }; class Normalizer { public: Normalizer(AclRule::Action anAction, const RegExExpr &expr, Rules &rules); void run(); protected: void doNoneOp(); void doNotOp(); void doAndOp(); void doOrOp(); protected: AclRule::Action theAction; const RegExExpr &theExpr; Rules &theRules; }; static MyOpts TheOpts; template inline void safeEject(A &a, int idx) { if (idx+1 < a.count()) a.memmove(idx, &a.item(idx+1), a.count()-(idx+1)); a.pop(); } /* MyOpt */ bool MyOpts::validate() const { if (!theCfgName) cerr << "must specify PGL configuration file (--config)" << endl; else return true; return false; } /* RuleCondnItem */ bool RuleCondnItem::operator ==(const RuleCondnItem &item) const { return oper == item.oper && expr->image() == item.expr->image(); } int RuleCondnItem::cmp(const RuleCondnItem &i) const { if (const int operDiff = oper - i.oper) return operDiff; return expr->image().cmp(i.expr->image()); } RuleCondnItem RuleCondnItem::negate() const { const RegExExpr::Oper o = oper == RegExExpr::opNone ? RegExExpr::opNot : RegExExpr::opNone; return RuleCondnItem(expr, o); } ostream &RuleCondnItem::print(ostream &os) const { if (oper == RegExExpr::opNot) os << "not "; const char *scope = 0; if (expr->image().cmp("user_group=", 11) == 0) scope = "group"; else if (expr->image().cmp("url=", 4) == 0) scope = "url"; else if (expr->image().cmp("url_host=", 9) == 0) scope = "server-name"; else if (expr->image().cmp("url_host_ip=", 12) == 0) scope = "server-ip"; else if (expr->image().cmp("url_path=", 9) == 0) scope = "url"; os << scope << ' '; if ((expr->flags() & RegEx::reExact) == 0) os << "matches "; os << '"' << expr->pattern() << '"'; return os; } /* RuleCondnHash */ RuleCondnHash::RuleCondnHash(const RuleCondn &aBase, int expCount): theBase(aBase), theIds(expCount*3 + 1), theExpCount(expCount), theCount(0) { theIds.count(theIds.capacity()); theIds.memset(0); } int RuleCondnHash::idxOf(const RuleCondnItem &i) const { const int pos = hash(i); for (int p = pos, c = 0; theIds[p]; ++p, p %= theIds.count(), ++c) { const int idx = theIds[p] - 1; if (theBase.item(idx) == i) return idx; Assert(c < theIds.count()); } return -1; } void RuleCondnHash::add(const RuleCondnItem &i, int idx) { Assert(theCount < theIds.count()); Assert(idx >= 0); int p = hash(i); for (; theIds[p]; ++p, p %= theIds.count()) { const int idx = theIds[p] - 1; if (theBase.item(idx) == i) return; // do not hash duplicates? } theIds[p] = idx + 1; theCount++; } inline int StrHash(const String &s) { const int step = Max(1, s.len() / 16); unsigned int res = 0; for (int i = 0; i < s.len(); i += step) res = res*33U + s.data()[i]; return Max(1, abs((int)res)); } int RuleCondnHash::hash(const RuleCondnItem &i) const { // XXX: replace StrHash with String::hash return abs(i.oper ^ StrHash(i.expr->image())) % theIds.count(); } /* RuleCondn */ RuleCondn::RuleCondn(int aCapacity): theItems(aCapacity), theHash(0) { if (aCapacity > 1) createHash(aCapacity); } bool RuleCondn::operator ==(const RuleCondn &c) const { if (count() != c.count()) return false; for (int i = 0; i < c.count(); ++i) { if (!has(c.item(i))) return false; } return true; } bool RuleCondn::has(const RuleCondnItem &h, int offset) const { if (theHash) return theHash->idxOf(h) >= offset; for (int i = offset; i < count(); ++i) { if (item(i) == h) return true; } return false; } bool RuleCondn::possible() const { for (int i = 0; i < count(); ++i) { if (has(item(i).negate(), i+1)) return false; } return true; } bool RuleCondn::covers(const RuleCondn &c) const { for (int i = 0; i < count(); ++i) { if (!c.has(item(i))) return false; } return true; } int RuleCondn::cmp(const RuleCondn &c) const { if (int cntDiff = count() - c.count()) return cntDiff; Assert(count() == c.count()); for (int i = 0; i < count(); ++i) { if (int itemDiff = item(i).cmp(c.item(i))) return itemDiff; } return 0; } static int cmpRuleCondnItems(const void *ip1, const void *ip2) { const RuleCondnItem &i1 = *(const RuleCondnItem*)ip1; const RuleCondnItem &i2 = *(const RuleCondnItem*)ip2; return (i1.cmp(i2)); } void RuleCondn::sort() { delete theHash; theHash = 0; qsort(theItems.items(), count(), sizeof(*theItems.items()), &cmpRuleCondnItems); createHash(count()); } void RuleCondn::prune() { Array prunedItems; // remove duplicates for (int i = 0; i < count(); ++i) { if (!has(item(i), i+1)) prunedItems.append(item(i)); } if (prunedItems.count() != count()) { theItems = prunedItems; reHash(); } } void RuleCondn::merge(const RuleCondn &c) { theItems.stretch(count() + c.count()); for (int i = 0; i < c.count(); ++i) if (!has(c.item(i))) add(c.item(i)); } void RuleCondn::simplify(Rules &rules, int offset) { Array prunedItems; for (int i = 0; i < count(); ++i) { const RuleCondnItem &ci = item(i); const RuleCondnItem notCi = ci.negate(); bool ejectCandidate = true; for (int r = offset; ejectCandidate && r < rules.count(); ++r) { ejectCandidate = rules[r]->condn().has(ci) || !rules[r]->condn().has(notCi); } if (!ejectCandidate) prunedItems.append(ci); } if (prunedItems.count() != count()) { theItems = prunedItems; reHash(); } } ostream &RuleCondn::print(ostream &os) const { if (!count()) return os << "any"; for (int i = 0; i < count(); ++i) { if (i) os << " and "; item(i).print(os); } return os; } void RuleCondn::add(const RuleCondnItem &i) { if (count() && !theHash) createHash(); if (theHash && count() > 0.75*theHash->capacity()) reHash(); if (theHash) theHash->add(i, theItems.count()); theItems.append(i); } void RuleCondn::createHash(int expCount) { if (theHash && expCount <= theHash->expCount()) return; if (theHash) delete theHash; theHash = new RuleCondnHash(*this, expCount); for (int i = 0; i < count(); ++i) theHash->add(item(i), i); } void RuleCondn::reHash() { delete theHash; theHash = 0; createHash(count()); } /* AclRule */ AclRule::AclRule(Action anAction, RuleCondn *aCondn): theAction(anAction), theCondn(aCondn) { } bool AclRule::operator ==(const AclRule &rule) const { return theAction == rule.theAction && (*theCondn) == (*rule.theCondn); } bool AclRule::covers(const AclRule &r) const { return action() == r.action() && theCondn->covers(*r.theCondn); } int AclRule::cmp(const AclRule &r) const { if (const int cndDiff = theCondn->cmp(*r.theCondn)) return cndDiff; return action() - r.action(); } void AclRule::sort() { theCondn->sort(); } void AclRule::prune() { theCondn->prune(); } void AclRule::simplify(Rules &rules, int offset) { theCondn->simplify(rules, offset); } ostream &AclRule::print(ostream &os) const { if (theAction == rlAllow) os << "allow "; else if (theAction == rlDeny) os << "deny "; else if (theAction == rlRewrite) os << "rewrite "; else Assert(false); Assert(theCondn); theCondn->print(os); return os << endl; } /* Rules */ #if FUTURE_CODE // checks both for (A,a) and (A,any) const AclRule *Rules::cover(const AclRule &rule) const { for (int i = 0; i < count(); ++i) { if (item(i)->action() != rule.action()) continue; if (item(i).body() == AclRule::AnyCondn) return &item(i); if (rule.body() == item(i)::body()) return &item(i); } return 0; } // checks for both (!A,a) and (!A,any) const AclRule *Rules::coverOtherAction(const AclRule &rule) const { for (int i = 0; i < count(); ++i) { if (item(i).action() == rule.action()) continue; if (item(i).body() == AclRule::AnyCondn) return &item(i); if (rule.body() == item(i)::body()) return &item(i); } return 0; } // checks for (*,a) const AclRule *Rules::coverAnyAction(const AclRule &rule) const { for (int i = 0; i < count(); ++i) { if (rule.body() == item(i)::body()) return &item(i); } return 0; } const AclRule *Rules::commit(const AclRule &rule) { append(rule.clone()); return last(); } #endif const AclRule *Rules::covered(const AclRule &r, int excIdx) const { for (int i = 0; i < count(); ++i) { if (i == excIdx) continue; if (item(i)->covers(r)) return item(i); } return 0; } void Rules::prune() { // prune individual rules {for (int i = 0; i < count(); ++i) item(i)->prune(); } // remove duplicates and rules with impossible conditions // also remove rules that are covered by more general rules {for (int i = 0; i < count();) { const AclRule &r = *item(i); bool bad = !r.condn().possible() || covered(*item(i), i); for (int goodIdx = 0; !bad && goodIdx < i; ++goodIdx) { bad = *item(goodIdx) == r; } if (bad) safeEject(*this, i); else ++i; }} } static int cmpRules(const void *rp1, const void *rp2) { const AclRule *r1 = *(const AclRule**)rp1; const AclRule *r2 = *(const AclRule**)rp2; return (r1->cmp(*r2)); } void Rules::sort() { // sort individual rules for (int i = 0; i < count(); ++i) item(i)->sort(); qsort(theItems, count(), sizeof(*theItems), &cmpRules); } void Rules::simplify() { for (int i = 0; i < count(); ++i) item(i)->simplify(*this, i+1); } ostream &Rules::print(ostream &os) const { for (int i = 0; i < count(); ++i) { //cout << setw(4) << setfill('0') << i << setfill(' ') << ' '; item(i)->print(cout); } return os; } /* Normalizer */ Normalizer::Normalizer(AclRule::Action anAction, const RegExExpr &anExpr, Rules &aRules): theAction(anAction), theExpr(anExpr), theRules(aRules) { } void Normalizer::doNoneOp() { RuleCondn *condn = new RuleCondn(1); condn->add(RuleCondnItem(theExpr.theVal)); AclRule *rule = new AclRule(theAction, condn); theRules.append(rule); } void Normalizer::doNotOp() { RegExExpr *e = theExpr.theLhs; Assert(e); switch (e->theOper) { case RegExExpr::opNone: { RuleCondn *condn = new RuleCondn(1); condn->add(RuleCondnItem(e->theVal, RegExExpr::opNot)); AclRule *rule = new AclRule(theAction, condn); theRules.append(rule); break; } case RegExExpr::opNot: { Normalizer n(theAction, *e->theLhs, theRules); n.run(); break; } case RegExExpr::opAnd: { RegExExpr notL(e->theLhs, RegExExpr::opNot, 0); RegExExpr notR(e->theRhs, RegExExpr::opNot, 0); RegExExpr x(¬L, RegExExpr::opOr, ¬R); Normalizer n(theAction, x, theRules); n.run(); break; } case RegExExpr::opOr: { RegExExpr notL(e->theLhs, RegExExpr::opNot, 0); RegExExpr notR(e->theRhs, RegExExpr::opNot, 0); RegExExpr x(¬L, RegExExpr::opAnd, ¬R); Normalizer n(theAction, x, theRules); n.run(); break; } default: Assert(false); } } void Normalizer::doOrOp() { Normalizer lhs(theAction, *theExpr.theLhs, theRules); lhs.run(); Normalizer rhs(theAction, *theExpr.theRhs, theRules); rhs.run(); } void Normalizer::doAndOp() { Rules lhsRules; Normalizer lhs(theAction, *theExpr.theLhs, lhsRules); lhs.run(); Rules rhsRules; Normalizer rhs(theAction, *theExpr.theRhs, rhsRules); rhs.run(); //rhsRules.prune(); //lhsRules.prune(); theRules.stretch(theRules.count() + lhsRules.count()*rhsRules.count()); for (int l = 0; l < lhsRules.count(); ++l) { for (int r = 0; r < rhsRules.count(); ++r) { RuleCondn *condn = new RuleCondn; condn->merge(lhsRules[l]->condn()); condn->merge(rhsRules[r]->condn()); AclRule *rule = new AclRule(theAction, condn); theRules.append(rule); } } } void Normalizer::run() { switch (theExpr.theOper) { case RegExExpr::opNone: { doNoneOp(); break; } case RegExExpr::opNot: { doNotOp(); break; } case RegExExpr::opAnd: { doAndOp(); break; } case RegExExpr::opOr: { doOrOp(); break; } default: Assert(false); } } static void normalizeRules(AclRule::Action action, const RegExExpr &expr, Rules &rules) { Normalizer n(action, expr, rules); n.run(); } int main(int argc, char **argv) { CmdLine cmd; cmd.configure(Array() << &TheOpts); if (!cmd.parse(argc, argv) || !TheOpts.validate()) return -1; configureStream(cout, 2); configureStream(clog, 3); GlbPermut().reseed(TheOpts.theGlbRngSeed); clog << argv[0] << ": parsing..." << endl; TheOpts.theCfgDirs.copy(PglPp::TheDirs); PglStaticSemx::Interpret(TheOpts.theCfgName); clog << argv[0] << ": collecting rules..." << endl; // collect all rules Rules rules; for (AgentArrIter ai(PglStaticSemx::TheAgentsToUse, 0, RobotSym::TheType); ai; ai.nextSym()) { const RobotSym &robot = (const RobotSym&)*ai.agentSym(); if (AclSym *acl = robot.acl()) { if (acl->allow()) normalizeRules(AclRule::rlAllow, *acl->allow(), rules); if (acl->deny()) normalizeRules(AclRule::rlDeny, *acl->deny(), rules); if (acl->rewrite()) normalizeRules(AclRule::rlRewrite, *acl->rewrite(), rules); } } // collect all groups //for (int i = 0; i < PglSemx::TheUserGroupsToUse; ++i) // TheGroups.append(PglSemx::TheUserGroupsToUse[i]); clog << argv[0] << ": pruning " << rules.count() << " rules ..." << endl; rules.prune(); // check for conflicts // check for coverage (holes) clog << argv[0] << ": sorting " << rules.count() << " rules ..." << endl; rules.sort(); clog << argv[0] << ": symplifying..." << endl; rules.simplify(); // prune based on order clog << argv[0] << ": printing " << rules.count() << " rules ..." << endl; rules.print(cout); return 0; }