/* Web Polygraph http://www.web-polygraph.org/ * (C) 2003-2006 The Measurement Factory * Licensed under the Apache License, Version 2.0 */ #include "pgl/pgl.h" #include "xstd/h/math.h" #include "xstd/TblDistr.h" #include "base/StringArray.h" #include "base/RndPermut.h" #include "pgl/PglIntSym.h" #include "pgl/PglStringSym.h" #include "pgl/PglArraySym.h" #include "xstd/rndDistrs.h" #include "xstd/gadgets.h" String ArraySym::TheType = "Array"; ArraySym::ArraySym(): ContainerSym(TheType), warnedBadProbs(false) { } ArraySym::ArraySym(const String &anItemType): ContainerSym(TheType), theItemType(anItemType), warnedBadProbs(false) { } ArraySym::~ArraySym() { while (theItems.count()) delete theItems.pop(); } bool ArraySym::isA(const String &type) const { if (ContainerSym::isA(type)) return true; if (theItemType) { if (const char *p = type.str("[]")) { return type.cmp(theItemType, p - type.cstr()) == 0; } } return false; } SynSym *ArraySym::dupe(const String &type) const { String itemType; if (const char *p = type.str("[]")) itemType = type(0, p-type.cstr()); else itemType = theItemType; ArraySym *clone = create(itemType); clone->warnedBadProbs = warnedBadProbs; if (clone->append(*this)) return clone; delete clone; return 0; } ArraySym *ArraySym::create(const String &itemType) const { return new ArraySym(itemType); } int ArraySym::count() const { int cnt = 0; for (int i = 0; i < theItems.count(); ++i) { cnt += itemCountAt(i); } return cnt; } const SynSym *ArraySym::item(int idx) const { double prob = -1; return itemProb(idx, prob); } double ArraySym::prob(int idx) const { double p = -1; itemProb(idx, p); Assert(p >= 0); return p; } double ArraySym::explProb(int firstLevelOff) const { return firstLevelOff < theProbs.count() ? theProbs[firstLevelOff] : (double)-1; } // adjusts assigned probability (or -1) into actual probability // computes and uses defaults (if -1) or corrects for user-distribution errors double ArraySym::actualProb(double p) const { Assert(theItems.count()); // collect info about current probs int setCount = 0; double setSum = 0; for (int i = 0; i < theProbs.count(); ++i) { if (theProbs[i] >= 0) { setSum += theProbs[i]; setCount++; } } if (!warnedBadProbs) { if (setSum <= 0.99 && setCount == theItems.count()) { print(cerr << loc(), String()) << endl; cerr << loc() << "explicit item probabilities in the array above add up to " << (100*setSum) << "% (less than 100%) and no wild-card entries are present;" << " missing percents will be spread out proportionally among array entries" << endl; warnedBadProbs = true; Assert(p >= 0); return setSum > 0 ? (p/setSum) : (1.0/setCount); } if (1.01 <= setSum) { print(cerr << loc(), String()) << endl; cerr << loc() << "explicit array probabilities in the array above add up to " << (100*setSum) << "% (more than 100%); " << "explicit probabilities will be adjusted proportionally to specified values" << endl; warnedBadProbs = true; return p >= 0 ? (p/setSum) : 0; } } if (p < 0) return (1.0 - setSum) / (theItems.count() - setCount); return p; } bool ArraySym::append(const ArraySym &arr) { const int oldCount = theItems.count(); for (int i = 0; i < arr.theItems.count(); ++i) { if (!cadd(*arr.theItems[i], arr.explProb(i))) { while (theItems.count() > oldCount) delete theItems.pop(); return false; } } return true; } bool ArraySym::cadd(const SynSym &s, double p) { //if (theItems.count() && s.isA(ContainerSym::TheType) && p < 0) { // cerr << s.loc() << "warning: use binary plus operator to concatenate containers; " // << "do not use [ ..., [nesting], ... ]." << endl; //} if (SynSym *clone = s.clone()) { theItems.append(clone); if (p >= 0) { // fill the gap with default probs as needed theProbs.stretch(theItems.count()); while (theProbs.count() < theItems.count()) theProbs.append(-1); theProbs.last() = p; } return true; } return false; } // unconditional add void ArraySym::add(const SynSym &s, double p) { if (!cadd(s, p)) { Assert(theItemType); // cadd cannot fail otherwise cerr << s.loc() << "cannot add `" << s.type() << "' item to `" << theItemType << "[]' array" << endl << xexit; } } ExpressionSym *ArraySym::bnOper(const Oper &op, const SynSym &exp) const { if (!op.plus()) return ContainerSym::bnOper(op, exp); ArraySym *res = (ArraySym*)clone(); Assert(res); if (exp.isA(TheType)) res->append((const ArraySym&)exp.cast(TheType)); else res->add(exp); return res; } const SynSym *ArraySym::itemProb(int idx, double &prob) const { int offset = 0; for (int i = 0; i < theItems.count(); ++i) { const int cnt = itemCountAt(i); if (offset <= idx && idx < offset + cnt) { const int iOffset = idx - offset; double iProb = -1; const SynSym *s = itemProbAt(i, iOffset, iProb); Assert(iProb >= 0); double assignedProb = -1; if (i < theProbs.count()) assignedProb = theProbs[i]; prob = iProb * actualProb(assignedProb); return s; } offset += cnt; } Assert(false); return 0; } int ArraySym::itemCountAt(int idx) const { if (theItems[idx]->isA(ContainerSym::TheType)) { return ((const ContainerSym&)theItems[idx]->cast(ContainerSym::TheType)).count(); } else { return 1; } } bool ArraySym::itemProbsSetAt(int idx) const { if (theItems[idx]->isA(ContainerSym::TheType)) return ((const ContainerSym&)theItems[idx]->cast(ContainerSym::TheType)).probsSet(); else return false; } const SynSym *ArraySym::itemProbAt(int idx, int offset, double &prob) const { if (theItems[idx]->isA(ContainerSym::TheType)) return ((const ContainerSym&)theItems[idx]->cast(ContainerSym::TheType)).itemProb(offset, prob); prob = 1; return theItems[idx]; } void ArraySym::copyProbs(Array &res) const { const int cnt = count(); res.stretch(cnt); for (int i = 0; i < cnt; ++i) res.append(prob(i)); } bool ArraySym::probsSet() const { if (theProbs.count()) return true; for (int i = 0; i < theItems.count(); ++i) { if (itemProbsSetAt(i)) return true; } return false; } ostream &ArraySym::print(ostream &os, const String &pfx) const { os << '['; for (int i = 0; i < theItems.count(); ++i) { if (i) os << ", "; theItems[i]->print(os, pfx); const double p = explProb(i); if (p >= 0) os << ": " << (100*p) << '%'; } os << ']'; return os; } // uses a table distribution to model item probabilities // selector is a distribution of item positions // XXX: if no probs specified, we use uniform distribution to save memory // ideally, table distr should be smart enough to detect this case RndDistr *ArraySym::makeSelector(const String &name) { if (!probsSet()) { RndGen *rng = new RndGen(GlbPermut(count(), rndArraySymSelector)); return new UnifDistr(rng, 0, count()); } Array probs; copyProbs(probs); return TblDistr::FromDistrTable(name, probs); } void ArraySym::toStringArray(StringArray &strs) const { for (int i = 0; i < theItems.count(); ++i) { if (theItems[i]->isA(ContainerSym::TheType)) { const ContainerSym &c = (const ContainerSym&)theItems[i]->cast(ContainerSym::TheType); c.toStringArray(strs); } else { const StringSym &s = (const StringSym&)theItems[i]->cast(StringSym::TheType); strs.append(s.val()); } } }