/* Web Polygraph http://www.web-polygraph.org/
* (C) 2003-2006 The Measurement Factory
* Licensed under the Apache License, Version 2.0 */
#include "base/polygraph.h"
#include <limits.h>
#include "xstd/h/math.h"
#include "xstd/h/iomanip.h"
#include "base/ILog.h"
#include "base/OLog.h"
#include "base/Histogram.h"
#include "xstd/gadgets.h"
Histogram::Histogram(): theValMin(-1), theValMax(-1), theBinMax(-1) {
}
Histogram::Histogram(Val aMin, Val aMax, int binCount) {
limits(aMin, aMax, binCount);
}
void Histogram::limits(Val aMin, Val aMax, int binCount) {
theBins.stretch(binCount+2);
theBins.count(binCount+2);
theValMin = aMin;
theValMax = aMax;
theBinMax = binCount+1;
}
void Histogram::reset() {
theStats.reset();
theBins.memset(0);
// do not reset 'limits'
}
void Histogram::record(Val v) {
theStats.record(v);
v -= theValMin;
const int idx = v >= 0 ? Min(val2Bin(v)+1, theBinMax) : 0;
theBins[idx]++;
}
void Histogram::add(const Histogram &h) {
// brand-new histogram can adjust its limits to match h
if (!theStats.count())
limits(h.theValMin, h.theValMax, h.theBinMax - 1);
// histograms must have identical structure
Assert(theValMin == h.theValMin);
Assert(theValMax == h.theValMax);
Assert(theBinMax == h.theBinMax);
Assert(theBins.count() == h.theBins.count());
theStats += h.theStats;
for (int i = 0; i < theBins.count(); ++i)
theBins[i] += h.theBins[i];
}
Histogram &Histogram::operator +=(const Histogram &h) {
add(h);
return *this;
}
OLog &Histogram::store(OLog &log) const {
log << theValMin << theValMax << theBinMax << theStats;
// see if it is more efficient to log busy bins only
int busyCount = 0;
if (theStats.count()) {
for (int i = 0; i < theBins.count(); ++i) {
if (theBins[i])
busyCount++;
}
}
// save space if we save at least 20%
const bool saveSpace = Percent(2*busyCount, theBins.count()) <= 0.80;
log << saveSpace;
if (saveSpace) {
log << theBins.count();
log << busyCount;
// store <pos,bin> pairs
for (int i = 0; i < theBins.count(); ++i) {
if (theBins[i])
log << i << theBins[i];
}
} else {
log << theBins;
}
return log;
}
ILog &Histogram::load(ILog &log) {
int vmin, vmax, bmax;
log >> vmin >> vmax >> bmax >> theStats;
Assert(theValMin == vmin);
Assert(theValMax == vmax);
Assert(theBinMax == bmax);
if (log.getb()) {
const int binCount = log.geti();
const int busyCount = log.geti();
Assert(binCount == theBins.count());
// read <pos,bin> pairs
for (int i = 0; i < busyCount; ++i) {
const int pos = log.geti();
Assert(0 <= pos && pos < theBins.count());
theBins[pos] = log.geti();
}
} else {
log >> theBins;
}
return log;
}
void Histogram::report(double step, ostream &os) const {
const int totCount = stats().count();
Array<HistogramBin> percs;
Percentiles(*this, percs, step);
if (!percs.count())
return;
const int maxNum = percs.last().sup;
const int numLen = maxNum > 1 ? (int)log10(maxNum-1.) : 1;
const int vw = 1 + Max(numLen, 4);
// header
os
<< "# bin"
<< ' ' << setw(vw) << "min"
<< ' ' << setw(vw) << "max"
<< " count % acc% "
<< endl;
int lastIdx = -1;
for (int i = 0; i < percs.count(); ++i) {
const HistogramBin &b = percs[i];
if (b.idx != lastIdx) {
os
<< setw(5) << b.idx
<< ' ' << setw(vw) << b.min
<< ' ' << setw(vw) << b.sup-1
<< ' ' << setw(7) << b.count
<< ' ' << setw(5) << Percent(b.count, totCount)
<< ' ' << setw(6) << Percent(b.accCount, totCount)
<< endl;
lastIdx = b.idx;
}
}
}
ostream &Histogram::print(ostream &os, const String &pfx) const {
theStats.print(os, pfx);
os << pfx << "hist:" << endl;
report(0.01, os);
os << endl;
return os;
}
/* HistogramBin */
HistogramBin &HistogramBin::operator +=(const HistogramBin &b) {
if (b.count) {
if (count) {
count += b.count;
accCount = b.accCount;
sup = b.sup;
// min and idx stay the same
} else {
*this = b;
}
}
return *this;
}
/* HistogramConstIter */
HistogramConstIter::HistogramConstIter(const Histogram &aHist):
theHist(aHist) {
theBin.reset();
sync();
}
HistogramConstIter &HistogramConstIter::operator ++() {
++theBin.idx;
sync();
return *this;
}
// must be called after reset or ++ (and nothing else)
void HistogramConstIter::sync() {
Assert(theBin.idx >= 0);
if (theBin.idx == 0) {
theBin.min = -1; // what else can we put here?
theBin.sup = theHist.theValMin;
} else
if (theBin.idx < theHist.theBinMax) {
theBin.min = theBin.sup;
theBin.sup = theHist.extract(theBin.idx+1);
} else
if (theBin.idx == theHist.theBinMax) {
theBin.min = theBin.sup;
theBin.sup = theHist.theValMax < INT_MAX ? theHist.theValMax+1 : INT_MAX;
} else
return;
theBin.count = theHist.count(theBin.idx);
theBin.accCount += theBin.count;
}
void Percentiles(const Histogram &hist, Array<HistogramBin> &percs, double pStep) {
Assert(!percs.count());
Assert(pStep > 0);
int stepId = 1;
const int totCount = hist.stats().count();
HistogramBin accBin;
int accCount = 0;
for (HistogramConstIter i(hist); i; ++i) {
const HistogramBin &b = *i;
if (b.count) {
accBin += b;
accCount += b.count;
if (accCount/(double)totCount >= stepId*pStep || accCount >= totCount) {
do {
percs.append(accBin);
stepId++;
} while (accCount/(double)totCount >= stepId*pStep);
accBin.reset();
}
}
}
}
syntax highlighted by Code2HTML, v. 0.9.1