/* Web Polygraph       http://www.web-polygraph.org/
 * (C) 2003-2006 The Measurement Factory
 * Licensed under the Apache License, Version 2.0 */

#include "base/polygraph.h"

#include "xstd/String.h"
#include "xstd/Clock.h"
#include "xstd/StrIdentifier.h"
#include "xstd/gadgets.h"
#include "base/OLog.h"
#include "base/AddrParsers.h"
#include "runtime/XactAbortCoord.h"
#include "runtime/HttpCookies.h"
#include "runtime/HttpDate.h"
#include "runtime/httpText.h"
#include "runtime/httpHdrs.h"


/* internal type to store static parsing info */
class MsgHdrParsTab {
	public:
		MsgHdrParsTab();
		~MsgHdrParsTab();

	public:
		StrIdentifier *ids;
		Array<MsgHdr::Parser*> *parsers;
};


MsgHdrParsTab *ReqHdr::TheParsTab = 0;
MsgHdrParsTab *RepHdr::TheParsTab = 0;


/* MsgHdr */

MsgHdr::MsgHdr(const MsgHdrParsTab &aTab): theParsTab(aTab) {
	reset();
}

void MsgHdr::reset() {
	theHdrSize = 0;
	theHttpVersion.reset();
	theContSize = -1;
	theChecksum.reset();
	theDate = Time();
	theGroupId.clear();
	theXactId.clear();
	theTarget = NetAddr();
	theRemWorld.reset();
	theAbortCoord.reset();
	thePhaseSyncPos = -1;
	theConnectionKeepAlive = kaDefault;
	theContType = ctUnknown;
	theTransferEncoding = tcNone;
	isCachable = true;

	theBufBeg = theBufEnd = theSrchPtr = 0;
	theFields.reset();
	theSrchState = ssFirst;

	// do not reset parsing tables
}

bool MsgHdr::markupContent() const {
	return theContType == ctMarkup;	
}

bool MsgHdr::knownContentType() const {
	return theContType != ctUnknown;	
}

bool MsgHdr::chunkedEncoding() const {
	return theTransferEncoding == tcChunked;
}

bool MsgHdr::persistentConnection() const {
	if (theHttpVersion <= HttpVersion(1,0)) // 1.0: keep if explicitly told so
		return theConnectionKeepAlive == kaYes;
	else // 1.1: keep unless told otherwise
		return theConnectionKeepAlive != kaNo;
}

// note: buf does not have to be zero-terminated!
bool MsgHdr::parse(const char *buf, Size sz) {
	if (!theBufBeg) { // have not started the search yet
		theBufBeg = theSrchPtr = buf;
		Assert(theSrchState == ssFirst);
	} else {           // continue search
		Assert(theBufBeg == buf);
		Assert(theSrchState != ssFound);
	}

	theBufEnd = buf + sz; // to be refined later
	while (theSrchPtr < theBufEnd && theSrchState != ssFound) {
		// search for LF
		if (theSrchState == ssFirst) {
			do {
				if (*theSrchPtr++ == '\n') {
					theSrchState = ssSkip;
					break;
				}
			} while (theSrchPtr < theBufEnd);
		}

		// LF after skipping optional CRs means end-of-headers
		while (theSrchState == ssSkip && theSrchPtr < theBufEnd) {
			if (*theSrchPtr == '\n') {
				theSrchState = ssFound;
			} else
			if (*theSrchPtr != '\r') {
				theFields.append(theSrchPtr); // start of a header!
				theSrchState = ssFirst;
			}
			++theSrchPtr;
		}
	}

	if (theSrchState != ssFound)
		return false;

	// found end-of-headers!
	theBufEnd = theSrchPtr;
	theHdrSize = theBufEnd - theBufBeg;

	// now parse known fields
	// luckily, we already know field starts!
	parseFields();
	return true;
}

void MsgHdr::parseFields() {
	const char *eoh = theBufEnd;
	// skip end-of-headers CRLF
	while (theBufBeg < eoh && eoh[-1] == '\n') --eoh;
	while (theBufBeg < eoh && eoh[-1] == '\r') --eoh;

	parseRLine(theBufBeg, theFields.count() ? theFields[0] : eoh);

	for (int i = theFields.count()-1; i >= 0; --i) {
		const char *hdr = theFields[i];
		const int len = eoh-hdr; // approximate (includes crlfs)
		const int id = theParsTab.ids->lookup(hdr, len);
		if (id > 0) {
			const char *val = hdr + theParsTab.ids->string(id).len();
			while (isspace(*val)) ++val;
			Parser p = *theParsTab.parsers->item(id);
			(this->*p)(val, eoh);
		}
		eoh = hdr;
	}
}

bool MsgHdr::parseHttpVersion(const char *&beg, const char *end, HttpVersion &v) {
	const char *p = 0;
	int major = -1, minor = -1;
	if (isInt(beg, major, &p) && p+1 < end && *p == '.' && isInt(p+1, minor, &p)) {
		v = HttpVersion(major, minor);
		beg = p;
		return true;
	}
	return false;
}

bool MsgHdr::parseUniqId(const char *&buf, const char *, UniqId &id) {
	int a = 0, b = 0, c = 0;
	const char *p = 0;
	if (isInt(buf, a, &p, 16) && *p == '.' &&
		isInt(p+1, b, &p, 16) && *p == ':' &&
		isInt(p+1, c, &buf, 16))
		return (id = UniqId(a, b, c)) != 0;

	return false;
}

bool MsgHdr::parseWorld(const char *buf, const char *eoh, ObjWorld &world) {
	UniqId id;
	if (!parseUniqId(buf, eoh, id) || *buf != ' ')
		return false;

	int size = -1, wss = -1;
	int hotPos = -1;
	const char *p = buf;

	if (isInt(p+1, wss, &p) && *p == '/' &&
	    isInt(p+1, size, &p) && *p == ' ' &&
	    isInt(p+1, hotPos)) {
		world.id(id);
		world.size(size);
		world.wss(wss);
		world.hotPos(hotPos);
		return true;
	}

	return false;
}

bool MsgHdr::ParseHostInUri(const char *&start, const char *eorl, NetAddr &host) {
	if (const char *newStart = SkipHostInUri(start, eorl, host)) {
		start = newStart;
		return true;
	}
	return false;
}

bool MsgHdr::parseUri(const char *&buf, const char *end, HttpUri &uri) {
	const char *uriStart = buf;

	// see if there is a protocol://host prefix
	if (*buf != '/')
		ParseHostInUri(buf, end, uri.host);

	uri.pathBuf = buf; // includes leading '/'

	// user-specified url_prefix may contain world-id tag;
	// search for the first tag that is followed by a valid wid
	while (const char *wid = StrBoundChr(buf, 'w', end)) {
		if (parseUniqId(buf = wid+1, end, uri.oid.world()))
			break;
	}

	if (const char *tid = StrBoundChr(buf, 't', end))
		isInt(tid + 1, uri.oid.type(), &buf, 16);
	if (const char *oid = StrBoundChr(buf, '_', end))
		isInt(oid + 1, uri.oid.name(), &buf, 16);

	// find the end of the Uri
	while (buf < end && !isspace(*buf))
		++buf;

	uri.pathLen = buf - uri.pathBuf;
	if (!uri.oid.world() || uri.oid.type() < 0 || uri.oid.name() < 0)
		uri.oid.foreignUrl(String(uriStart, buf - uriStart));
	return true;
}

bool MsgHdr::parseContLen(const char *buf, const char *) {
	theContSize = xatoi(buf, -1);
	return theContSize >= 0;
}

bool MsgHdr::parseContMd5(const char *buf, const char *eoh) {
	if (DecodeBase64(buf, eoh - buf, theChecksum.buf(), theChecksum.size()) == theChecksum.size()) {
		theChecksum.set(true);
		return true;
	}
	return false;
}

bool MsgHdr::parseContType(const char *buf, const char *eoh) {
	theContType = ctOther; // default
	if (strncasecmp(buf, "text/", 5) == 0) {
		buf += 5;
		if (buf+4 <= eoh && strncasecmp(buf+2, "ml", 2) == 0)
			theContType = ctMarkup;
		else
		if (buf+4 <= eoh && strncasecmp(buf+1, "ml", 2) == 0)
			theContType = ctMarkup;
		else
		if (buf+3 <= eoh && strncasecmp(buf, "css", 3) == 0)
			theContType = ctMarkup;
	}

	return true;
}

bool MsgHdr::parseDate(const char *buf, const char *eoh) {
    theDate = HttpDateParse(buf, eoh - buf);
    return theDate >= 0;
}

bool MsgHdr::parsePragma(const char *buf, const char *) {
	if (!strncasecmp("no-cache", buf, 8))
		isCachable = false;
	else
		return false;
	return true;
}

bool MsgHdr::parseCControl(const char *buf, const char *) {
	if (!strncasecmp("no-cache", buf, 8)) {
		isCachable = false;
		return true;
	}
	return false;
}

bool MsgHdr::parseXXact(const char *buf, const char *eoh) {
	return
		parseUniqId(buf, eoh, theGroupId) && *buf == ' ' &&
		parseUniqId(++buf, eoh, theXactId);
}

bool MsgHdr::parseXRemWorld(const char *buf, const char *eoh) {
	return parseWorld(buf, eoh, theRemWorld);
}

bool MsgHdr::parseXAbort(const char *buf, const char *) {
	int whether = 0;
	int where = 0;
	const char *p = 0;
	if (isInt(buf, whether, &p) && *p == ' ' &&	isInt(p+1, where)
		&& whether && where) {
		theAbortCoord.configure(whether, where);
		return true;
	}

	return false;
}

bool MsgHdr::parseXPhaseSyncPos(const char *buf, const char *) {
	thePhaseSyncPos = xatoi(buf, 0);
	return true;
}

bool MsgHdr::parseXTarget(const char *buf, const char *eoh) {
	return ParseNetAddr(buf, eoh, theTarget);
}

/* XXX: Connection and other headers may have a _list_ of options */

bool MsgHdr::parseConnection(const char *buf, const char *) {
	if (!strncasecmp("close", buf, 5))
		theConnectionKeepAlive = kaNo;
	else
	if (!strncasecmp("keep", buf, 4))
		theConnectionKeepAlive = kaYes;
	else
		return false;
	return true;
}

bool MsgHdr::parseTransferEncoding(const char *buf, const char *) {
	if (!strncasecmp("chunked", buf, 7))
		theTransferEncoding = tcChunked;
	else
	if (!strncasecmp("identity", buf, 8))
		theTransferEncoding = tcIdentity;
	else
		theTransferEncoding = tcOther;
	return true;
}

// adds definitions common to replies and requests
void MsgHdr::Configure(MsgHdrParsTab &tab) {
	AddParser(hfpDate, &MsgHdr::parseDate, tab);
	AddParser(hfpContLength, &MsgHdr::parseContLen, tab);
	AddParser(hfpContMd5, &MsgHdr::parseContMd5, tab);
	AddParser(hfpContType, &MsgHdr::parseContType, tab);
	AddParser(hfpCacheControl, &MsgHdr::parseCControl, tab);
	AddParser(hfpConnection, &MsgHdr::parseConnection, tab);
	AddParser(hfpPragma, &MsgHdr::parsePragma, tab);
	AddParser(hfpProxyConnection, &MsgHdr::parseConnection, tab);
	AddParser(hfpTransferEncoding, &MsgHdr::parseTransferEncoding, tab);
	AddParser(hfpXXact, &MsgHdr::parseXXact, tab);
	AddParser(hfpXRemWorld, &MsgHdr::parseXRemWorld, tab);
	AddParser(hfpXAbort, &MsgHdr::parseXAbort, tab);
	AddParser(hfpXPhaseSyncPos, &MsgHdr::parseXPhaseSyncPos, tab);
	AddParser(hfpXTarget, &MsgHdr::parseXTarget, tab);
}

int MsgHdr::AddParser(const String &field, Parser parser, MsgHdrParsTab &where) {
	Assert(field);
	const String trimmedField = isspace(field.last()) ? field(0, field.len()-1) : field;
	Assert(trimmedField);

	const int id = where.ids->add(trimmedField);
	where.parsers->put(new Parser(parser), id);
	return id;
}

void MsgHdr::store(OLog &log) const {
	log
		<< theHdrSize
		<< (int)theDate.sec()
		<< theContSize
		// << theChecksum
		<< theGroupId
		<< theXactId
		<< theTarget
		<< theHttpVersion.vMinor() // XXX: log major too
		<< (int)theConnectionKeepAlive
		<< isCachable
		// XXX: not stored or loaded: theTransferEncoding, theContType
		;
}

// these should never be called
inline bool dontCallMe() { Assert(0); return false; }
bool MsgHdr::parseGetReqLine(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseHeadReqLine(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parsePostReqLine(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parsePutReqLine(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseHost(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseServer(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseLocation(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseLMT(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseExpires(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseIms(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseAcceptEncoding(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseXLocWorld(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseCookie(const char *, const char *) { return dontCallMe(); }


/* ReqHdr */

ReqHdr::ReqHdr(): MsgHdr(*TheParsTab), isHealthCheck(false),
	isAcceptingGzip(false) {
}

void ReqHdr::reset() {
	MsgHdr::reset();
	theUri = HttpUri();
	theIms = Time();
	theLocWorld.reset();
	isHealthCheck = false;
	isAcceptingGzip = false;
}

bool ReqHdr::parseRLine(const char *buf, const char *eorl) {
	const int id = theParsTab.ids->lookup(buf, eorl - buf);
	if (id > 0) {
		buf += theParsTab.ids->string(id).len();
		while (isspace(*buf)) ++buf;

		Parser p = *theParsTab.parsers->item(id);
		return (this->*p)(buf, eorl);
	}
	return false;
}

bool ReqHdr::parseAnyReqLine(const char *buf, const char *eorl) {
	// a "well-known" health check uri
	static const String health = "/health";
	isHealthCheck = health.casePrefixOf(buf, eorl-buf);

	parseUri(buf, eorl, theUri);
	if (const char *proto = StrBoundChr(buf, ' ', eorl)) {
		// optimization: not checking for "HTTP/" match
		proto += 6;
		if (proto < eorl)
			parseHttpVersion(proto, eorl, theHttpVersion);
	}
	return true;
}

bool ReqHdr::parseGetReqLine(const char *buf, const char *eorl) {
	if (parseAnyReqLine(buf, eorl)) {
		theUri.oid.get(true);
		return true;
	}
	return false;
}

bool ReqHdr::parseHeadReqLine(const char *buf, const char *eorl) {
	if (parseAnyReqLine(buf, eorl)) {
		theUri.oid.head(true);
		return true;
	}
	return false;
}

bool ReqHdr::parsePostReqLine(const char *buf, const char *eorl) {
	if (parseAnyReqLine(buf, eorl)) {
		theUri.oid.post(true);
		return true;
	}
	return false;
}

bool ReqHdr::parsePutReqLine(const char *buf, const char *eorl) {
	if (parseAnyReqLine(buf, eorl)) {
		theUri.oid.put(true);
		return true;
	}
	return false;
}

bool ReqHdr::parseHost(const char *buf, const char *eoh) {
	return ParseNetAddr(buf, eoh, theUri.host);
}

bool ReqHdr::parseIms(const char *buf, const char *eoh) {
	theIms = HttpDateParse(buf, eoh - buf);
	return theIms >= 0;
}

bool ReqHdr::parseAcceptEncoding(const char *buf, const char *eoh) {
	// XXX: these checks ignore "q=0" preferences
	isAcceptingGzip = StrBoundChr(buf, '*', eoh) ||
		StrBoundStr(buf, "gzip", eoh); // XXX: codings are case-insensitive
	return true;
}

bool ReqHdr::parseXLocWorld(const char *buf, const char *eoh) {
	return parseWorld(buf, eoh, theLocWorld);
}

void ReqHdr::store(OLog &log) const {
	MsgHdr::store(log);
	log << theUri.host << theUri.oid << (int)theIms.sec();
}

bool ReqHdr::expectBody() const {
	return theUri.oid.post() || theUri.oid.put();
}

bool ReqHdr::acceptedEncoding(int coding) const {
	return coding == codingIdentity || // always acceptable for now
		(coding == codingGzip && isAcceptingGzip);
}

void ReqHdr::Configure() {
	TheParsTab = new MsgHdrParsTab();
	MsgHdr::Configure(*TheParsTab);
	AddParser(hfpHost, &MsgHdr::parseHost, *TheParsTab);
	AddParser(hfpIMS, &MsgHdr::parseIms, *TheParsTab);
	AddParser(hfpXLocWorld, &MsgHdr::parseXLocWorld, *TheParsTab);
	AddParser(hfpAcceptEncoding, &MsgHdr::parseAcceptEncoding, *TheParsTab);

	// request method parsers use the same index/interface as field parsers
	AddParser(rlpGet, &MsgHdr::parseGetReqLine, *TheParsTab);
	AddParser(rlpHead, &MsgHdr::parseHeadReqLine, *TheParsTab);
	AddParser(rlpPost, &MsgHdr::parsePostReqLine, *TheParsTab);
	AddParser(rlpPut, &MsgHdr::parsePutReqLine, *TheParsTab);
}

void ReqHdr::Clean() {
	delete TheParsTab;
	TheParsTab = 0;
}


/* RepHdr */

bool RepHdr::PositiveStatusCode(int code) {
     /* 1xx: Informational - Request received, continuing process
      * 2xx: Success - The action was successfully received,  understood, and accepted
      * 3xx: Redirection - Further action must be taken in order to complete the request
      * 4xx: Client Error - The request contains bad syntax or cannot be fulfilled
      * 5xx: Server Error - The server failed to fulfill an apparently valid request */
	return 100 <= code && code < 400;
}

RepHdr::RepHdr(): MsgHdr(*TheParsTab), theStatus(scUnknown), theCookies(0) {
}

void RepHdr::reset() {
	MsgHdr::reset();
	theServer = String();
	theLocn = HttpUri();
	theLMT = theExpires = Time();
	theStatus = scUnknown;
	theCookies = 0;
}

void RepHdr::collectCookies(HttpCookies *cookies) {
	theCookies = cookies;
}

bool RepHdr::expectPolyHeaders() const {
	return expectBody() &&
		theStatus != sc407_ProxyAuthRequired &&
		theStatus != sc403_Forbidden &&
		!redirect();
}

// RFC 2616: All responses to the HEAD request method MUST NOT include
// a message-body. All 1xx , 204, and 304 responses MUST NOT include a
// message-body. All other responses do include a message-body.
bool RepHdr::expectBody() const {
	// note: we cannot handle the HEAD case here; the caller should
	if ((100 <= theStatus && theStatus < 200) ||
		theStatus == sc204_NoContent ||
		theStatus == sc304_NotModified)
		return false;
	return true;
}

bool RepHdr::redirect() const {
	return 
		theStatus == sc300_Choices ||
		theStatus == sc302_Found ||
		theStatus == sc303_Other ||
		theStatus == sc307_TmpRedir;
}

Time RepHdr::calcLmt() const {
	if (theLMT >= 0)
		return theLMT;
	if (theDate >= 0)
		return theDate;
	return TheClock;
}

bool RepHdr::parseRLine(const char *buf, const char *eorl) {
	if (strncasecmp("HTTP/", buf, 5) != 0)
		return false;
	buf += 5;
	if (buf >= eorl || !parseHttpVersion(buf, eorl, theHttpVersion))
		return false;
	buf += 1;
	return isInt(buf, theStatus);
}

bool RepHdr::parseServer(const char *buf, const char *eoh) {
	theServer = String(buf, eoh-buf);
	return theServer.len() > 0;
}

bool RepHdr::parseLocation(const char *buf, const char *eoh) {
	return parseUri(buf, eoh, theLocn);
}

bool RepHdr::parseLMT(const char *buf, const char *eoh) {
    theLMT = HttpDateParse(buf, eoh - buf);
    return theLMT >= 0;
}

bool RepHdr::parseExpires(const char *buf, const char *eoh) {
    theExpires = HttpDateParse(buf, eoh - buf);
    return theExpires >= 0;
}

bool RepHdr::parseCookie(const char *buf, const char *eoh) {
	if (theCookies) {
		HttpCookie *cookie = HttpCookieParse(buf, eoh - buf);
 		if (!Should(cookie))
			return false;
		theCookies->add(cookie);
	}
    return true;
}

void RepHdr::store(OLog &log) const {
	MsgHdr::store(log);
	log
		<< theStatus
		<< (int)theLMT.sec()
		<< (int)theExpires.sec()
		;
}

void RepHdr::Configure() {
	TheParsTab = new MsgHdrParsTab();
	MsgHdr::Configure(*TheParsTab);
	AddParser(hfpLocation, &MsgHdr::parseLocation, *TheParsTab);
	AddParser(hfpServer, &MsgHdr::parseServer, *TheParsTab);
	AddParser(hfpLmt, &MsgHdr::parseLMT, *TheParsTab);
	AddParser(hfpExpires, &MsgHdr::parseExpires, *TheParsTab);
	AddParser(hfpSetCookie, &MsgHdr::parseCookie, *TheParsTab);
}

void RepHdr::Clean() {
	delete TheParsTab;
	TheParsTab = 0;
}


/* MsgHdrParsTab */

MsgHdrParsTab::MsgHdrParsTab() {
	ids = new StrIdentifier;
	parsers = new Array<MsgHdr::Parser*>;
}

MsgHdrParsTab::~MsgHdrParsTab() {
	delete ids; ids = 0;
	while (parsers->count()) delete parsers->pop();
	delete parsers; parsers = 0;
}


syntax highlighted by Code2HTML, v. 0.9.1