/* Web Polygraph       http://www.web-polygraph.org/
 * (C) 2003-2006 The Measurement Factory
 * Licensed under the Apache License, Version 2.0 */

#include "base/polygraph.h"

#include <limits.h>

#include "xstd/h/iostream.h"
#include <fstream>
#include "xstd/h/iomanip.h"

#include "xstd/Rnd.h"
#include "base/RndPermut.h"
#include "xstd/Ring.h"
#include "tools/IntIntHash.h"
#include "base/CmdLine.h"
#include "base/opts.h"
#include "base/polyOpts.h"
#include "xstd/gadgets.h"


class MyOpts: public OptGrp {
	public:
		MyOpts():
			theHelpOpt(this,      "help",             "list of options"),
			theVersOpt(this,      "version",          "package version info"),
			theOutFileName(this,  "out <file>",       "redirect console output", "-"),
			theCachabRatio(this,  "cachable <%>",     "portion of cachable replies", 0.80),
			thePublicRatio(this,  "public_interest <%>", "portion of URLs shared among all robots", 0.50),
			theRecurrRatio(this,  "recurrence <%>",   "probability of a re-visit to a URL", 0.55/0.80),
			theWorkSetSize(this,  "work_set_size <size>","working set size"),
			theCacheSize(this,    "cache_size <size>","cache size", BigSize::MB(100)),
			theObjSize(this,      "obj_size <size>",  "average object size", Size::KB(13)),
			theRobCount(this,     "robots <int>",     "total number of robots to simulate", 1),
			thePopModel(this,     "pop_model <str>",  "popularity model", "unif"),
			theSimLength(this,    "sim_length <int>", "total number of request to simulate", 50000000)
			{}

		virtual bool validate() const;

	public:
		HelpOpt theHelpOpt;
		VersionOpt theVersOpt;
		StrOpt theOutFileName;
		DblOpt theCachabRatio;
		DblOpt thePublicRatio;
		DblOpt theRecurrRatio;
		BigSizeOpt theWorkSetSize;
		BigSizeOpt theCacheSize;
		SizeOpt theObjSize;
		IntOpt theRobCount;
		StrOpt thePopModel;
		IntOpt theSimLength;
} TheOpts;

class Robot {
	public:
		Robot(int anOidOff);

		void step();

	protected:
		int genOid();

	protected:
		int theOidOff;
		int thePrivOidCnt;
};

class Cache {
	public:
		Cache(int aCapacity);

		double dhp() const { return Percent(theHitCount, theReqCount); }
		double intervalDhp();
		double utilp() const { return Percent(theSize, theCapacity); }
		bool full() const { return theSize >= theCapacity; }
		int capacity() const { return theCapacity; }
		int fill() const { return theFill; }
		int reqs() const { return theReqCount; }
		const IntIntHash &hash() const { return theIndex; }

		void noteObject(int oid);

	protected:
		void purge();

	protected:
		int theCapacity;
		int theSize;
		int theFill;

		Ring<int> theRepPolicy;
		IntIntHash theIndex;

		int theHitCount;
		int theReqCount;

		int theIntvlHitCount;
		int theIntvlReqCount;
};


static
struct Server {
	int theLastOid;
} TheServer;


typedef int (*PopModelPtr)(RndGen &rng, int lastOid);
static int PopModel(RndGen &rng, int lastOid, int wsCap);

static Array<Robot*> TheRobots;
static Cache *TheCache = 0;
static int TheOidLmt = -1; // limit for private and shared oids

static int TheTotlWorkSetCap = -1;
static int ThePrivWorkSubsetCap = -1;
static int TheShrdWorkSubsetCap = -1;

static PopModelPtr ThePopModel = 0;
static double ThePopModelParam = 0;


bool MyOpts::validate() const {
	if (theWorkSetSize <= 0)
		cerr << "working set size must be specified" << endl;
	else
		return true;
	return false;
}


/* Robot */

Robot::Robot(int anOidOff):	theOidOff(anOidOff), thePrivOidCnt(0) {
}

// mimics Client::genOid
int Robot::genOid() {
	static RndGen rng;
	const bool publicOid = rng() < TheOpts.thePublicRatio;
	const bool repeatOid = rng() < TheOpts.theRecurrRatio;

	if (publicOid) {
		if (repeatOid && TheServer.theLastOid > 0) {
			return PopModel(rng, TheServer.theLastOid, TheShrdWorkSubsetCap);
		}
		Assert(TheServer.theLastOid < TheOidLmt);
		return ++TheServer.theLastOid;
	}

	if (repeatOid && thePrivOidCnt > 0)
		return theOidOff + PopModel(rng, thePrivOidCnt, ThePrivWorkSubsetCap);

	Assert(thePrivOidCnt < TheOidLmt);
	return theOidOff + (++thePrivOidCnt);
}

void Robot::step() {
	const int oid = genOid();
	TheCache->noteObject(oid);
}


/* Cache */

Cache::Cache(int aCapacity): theCapacity(aCapacity), theSize(0), theFill(0),
	theRepPolicy(4*aCapacity), theIndex(aCapacity),
	theHitCount(0), theReqCount(0), theIntvlHitCount(0), theIntvlReqCount(0) {
}

void Cache::noteObject(int oid) {
	Assert(oid > 0);
	theReqCount++;
	theIntvlReqCount++;

	RndGen rng(LclPermut(oid));
	if (rng() > TheOpts.theCachabRatio)
		return; // uncachable object

	IntIntHash::Loc loc;
	if (theIndex.find(oid, loc)) {
		theHitCount++;
		theIntvlHitCount++;
		theIndex[loc]++;
	} else {
		const bool wasFull = full();
		theIndex.addAt(loc, oid, 1);
		theSize++;
		theFill++;

		if (wasFull)
			purge();
	}

	Assert(!theRepPolicy.full());
	theRepPolicy.enqueue(oid);
}

void Cache::purge() {
	Assert(theSize > 0);
	while (1) {
		Assert(!theRepPolicy.empty());
		const int oid = theRepPolicy.dequeue();
		IntIntHash::Loc loc;
		Assert(theIndex.find(oid, loc));
		int &ttl = theIndex[loc];
		Assert(ttl > 0);
		if (--ttl == 0) {
			theIndex.delAt(loc);
			break;
		}
	}
	theSize--;
}

double Cache::intervalDhp() {
	const double res = Percent(theIntvlHitCount, theIntvlReqCount);
	theIntvlHitCount = theIntvlReqCount = 0;
	return res;
}

static
void configureLogs(int prec) {
	if (TheOpts.theOutFileName && TheOpts.theOutFileName != "-")
		redirectOutput(TheOpts.theOutFileName.cstr());

	configureStream(cout, prec);
	configureStream(cerr, prec);
	configureStream(clog, prec);
}


/* Pop Models */

static
int UnifPopModel(RndGen &rng, int lastOid) {
	return 1 + rng(0, lastOid);
}


int Zipf(double alpha, RndGen &rng, int lastOid) {
	const double rn = rng();
	return (int)pow(lastOid+1, pow(rn,alpha));
}

static
int ZipfPopModel(RndGen &rng, int lastOid) {
	return 1 + lastOid - Zipf(ThePopModelParam, rng, lastOid);
}

inline double logd(double x) { return log(x); }

static
int ZipdPopModel(RndGen &rng, int lastOid) {
	if (lastOid == 1 || ThePopModelParam >= 1)
		return lastOid;
	const double alpha = logd(logd(2)/logd(lastOid+1)) / logd(ThePopModelParam);
	return 1 + lastOid - Zipf(alpha, rng, lastOid);
}

static
int PopModel(RndGen &rng, int lastOid, int wsCap) {
	const int offset = lastOid > wsCap ? lastOid-wsCap : 0;
	return offset + ThePopModel(rng, lastOid-offset);
}

// set some general stuff and
// propogate cmd line options to corresponding objects
static
void configure() {
	configureLogs(2);

	// this is total work set size
	TheTotlWorkSetCap = 1 + (int) (TheOpts.theWorkSetSize / BigSize(TheOpts.theObjSize));

	// compute private work subsets for robots and "shared" subset
	ThePrivWorkSubsetCap = 1 + (int)((1-TheOpts.thePublicRatio)*TheTotlWorkSetCap/(int)TheOpts.theRobCount);
	TheShrdWorkSubsetCap = 1 + (int)(TheOpts.thePublicRatio*TheTotlWorkSetCap);
	
	// note: we do not adjust subsets for uncachable objects because
	// robots have no idea and do not care what is cachable

	String pmName = TheOpts.thePopModel;
	if (const char *p = pmName.chr(':')) {
		isNum(p+1, ThePopModelParam);
		pmName = pmName(0, p-pmName.cstr());
	}

	if (TheOpts.thePopModel == "unif") {
		ThePopModel = &UnifPopModel;
	} else 
	if (pmName == "zipf") {
		ThePopModel = &ZipfPopModel;
		if (ThePopModelParam <= 0)
			ThePopModelParam = 1;
	} else
	if (pmName == "zipd") {
		ThePopModel = &ZipdPopModel;
		if (ThePopModelParam <= 0)
			ThePopModelParam = 0.5/100;
	} else {
		cerr << "unknown popularity model `" << TheOpts.thePopModel << "'" << endl;
		exit(-1);
	}

	const int objInCache = 1 + (int) (TheOpts.theCacheSize / BigSize(TheOpts.theObjSize));
	TheCache = new Cache(objInCache);

	// oid limits
	TheOidLmt = INT_MAX / (1 + (int)TheOpts.theRobCount);

	for (int i = 0; i < TheOpts.theRobCount; ++i)
		TheRobots.append(new Robot((i+1)*TheOidLmt));
}

static
void report() {
	static int repCnt = 0;
	if (!repCnt++) {
		cout << '#'
			<< ' ' << setw(8) << "reqs"
			<< ' ' << setw(8) << "fill#"
			<< ' ' << setw(8) << "fill%"
			<< ' ' << setw(6) << "DHRi"
			<< ' ' << setw(6) << "DHR"
			<< endl;
	}

	cout << 'i'
		<< ' ' << setw(8) << TheCache->reqs()
		<< ' ' << setw(8) << TheCache->fill()
		<< ' ' << setw(8) << (int)Percent(TheCache->fill(), TheCache->capacity())
		<< ' ' << setw(6) << TheCache->intervalDhp()
		<< ' ' << setw(6) << TheCache->dhp()
		<< endl;
}

static
void run() {
	static RndGen rng;

	bool full = TheCache->full();
	if (!full)
		cout << "# filling the cache..." << endl;

	const int repCycle = Max(1, TheOpts.theSimLength/1000, TheCache->capacity() / 20);
	for (int i = TheOpts.theSimLength; i; --i) {
		// select a random robot
		Robot *robot = TheRobots[rng(0, TheRobots.count())];
		robot->step();

		if ((full && abs(i) % repCycle == 1) || (!full && TheCache->full()))
			report();

		full = TheCache->full();
	}
}

int main(int argc, char *argv[]) {
	CmdLine cmd;
	cmd.configure(Array<OptGrp*>() << &TheOpts);
	if (!cmd.parse(argc, argv) || !TheOpts.validate())
		return -1;

	configure();
	cmd.report(cout);

	cout << "# " << TheOpts.theCacheSize << " cache fits " << TheCache->capacity() << " objects" << endl;
	cout << "# " << "working set is " << TheOpts.theWorkSetSize << " or " << TheTotlWorkSetCap << " objects" << endl;
	cout << "# " << "working set split: " << TheRobots.count() << " * " << ThePrivWorkSubsetCap << " + " << TheShrdWorkSubsetCap << " objects" << endl;
	cout << "# " << "server and robot world limit is " << TheOidLmt << " oids each" << endl;

	run();

	return 0;
}


syntax highlighted by Code2HTML, v. 0.9.1