/* Web Polygraph http://www.web-polygraph.org/
* (C) 2003-2006 The Measurement Factory
* Licensed under the Apache License, Version 2.0 */
#include "base/polygraph.h"
#include "xstd/String.h"
#include "xstd/Clock.h"
#include "xstd/StrIdentifier.h"
#include "xstd/gadgets.h"
#include "base/OLog.h"
#include "base/AddrParsers.h"
#include "runtime/XactAbortCoord.h"
#include "runtime/HttpCookies.h"
#include "runtime/HttpDate.h"
#include "runtime/httpText.h"
#include "runtime/httpHdrs.h"
/* internal type to store static parsing info */
class MsgHdrParsTab {
public:
MsgHdrParsTab();
~MsgHdrParsTab();
public:
StrIdentifier *ids;
Array<MsgHdr::Parser*> *parsers;
};
MsgHdrParsTab *ReqHdr::TheParsTab = 0;
MsgHdrParsTab *RepHdr::TheParsTab = 0;
/* MsgHdr */
MsgHdr::MsgHdr(const MsgHdrParsTab &aTab): theParsTab(aTab) {
reset();
}
void MsgHdr::reset() {
theHdrSize = 0;
theHttpVersion.reset();
theContSize = -1;
theChecksum.reset();
theDate = Time();
theGroupId.clear();
theXactId.clear();
theTarget = NetAddr();
theRemWorld.reset();
theAbortCoord.reset();
thePhaseSyncPos = -1;
theConnectionKeepAlive = kaDefault;
theContType = ctUnknown;
theTransferEncoding = tcNone;
isCachable = true;
theBufBeg = theBufEnd = theSrchPtr = 0;
theFields.reset();
theSrchState = ssFirst;
// do not reset parsing tables
}
bool MsgHdr::markupContent() const {
return theContType == ctMarkup;
}
bool MsgHdr::knownContentType() const {
return theContType != ctUnknown;
}
bool MsgHdr::chunkedEncoding() const {
return theTransferEncoding == tcChunked;
}
bool MsgHdr::persistentConnection() const {
if (theHttpVersion <= HttpVersion(1,0)) // 1.0: keep if explicitly told so
return theConnectionKeepAlive == kaYes;
else // 1.1: keep unless told otherwise
return theConnectionKeepAlive != kaNo;
}
// note: buf does not have to be zero-terminated!
bool MsgHdr::parse(const char *buf, Size sz) {
if (!theBufBeg) { // have not started the search yet
theBufBeg = theSrchPtr = buf;
Assert(theSrchState == ssFirst);
} else { // continue search
Assert(theBufBeg == buf);
Assert(theSrchState != ssFound);
}
theBufEnd = buf + sz; // to be refined later
while (theSrchPtr < theBufEnd && theSrchState != ssFound) {
// search for LF
if (theSrchState == ssFirst) {
do {
if (*theSrchPtr++ == '\n') {
theSrchState = ssSkip;
break;
}
} while (theSrchPtr < theBufEnd);
}
// LF after skipping optional CRs means end-of-headers
while (theSrchState == ssSkip && theSrchPtr < theBufEnd) {
if (*theSrchPtr == '\n') {
theSrchState = ssFound;
} else
if (*theSrchPtr != '\r') {
theFields.append(theSrchPtr); // start of a header!
theSrchState = ssFirst;
}
++theSrchPtr;
}
}
if (theSrchState != ssFound)
return false;
// found end-of-headers!
theBufEnd = theSrchPtr;
theHdrSize = theBufEnd - theBufBeg;
// now parse known fields
// luckily, we already know field starts!
parseFields();
return true;
}
void MsgHdr::parseFields() {
const char *eoh = theBufEnd;
// skip end-of-headers CRLF
while (theBufBeg < eoh && eoh[-1] == '\n') --eoh;
while (theBufBeg < eoh && eoh[-1] == '\r') --eoh;
parseRLine(theBufBeg, theFields.count() ? theFields[0] : eoh);
for (int i = theFields.count()-1; i >= 0; --i) {
const char *hdr = theFields[i];
const int len = eoh-hdr; // approximate (includes crlfs)
const int id = theParsTab.ids->lookup(hdr, len);
if (id > 0) {
const char *val = hdr + theParsTab.ids->string(id).len();
while (isspace(*val)) ++val;
Parser p = *theParsTab.parsers->item(id);
(this->*p)(val, eoh);
}
eoh = hdr;
}
}
bool MsgHdr::parseHttpVersion(const char *&beg, const char *end, HttpVersion &v) {
const char *p = 0;
int major = -1, minor = -1;
if (isInt(beg, major, &p) && p+1 < end && *p == '.' && isInt(p+1, minor, &p)) {
v = HttpVersion(major, minor);
beg = p;
return true;
}
return false;
}
bool MsgHdr::parseUniqId(const char *&buf, const char *, UniqId &id) {
int a = 0, b = 0, c = 0;
const char *p = 0;
if (isInt(buf, a, &p, 16) && *p == '.' &&
isInt(p+1, b, &p, 16) && *p == ':' &&
isInt(p+1, c, &buf, 16))
return (id = UniqId(a, b, c)) != 0;
return false;
}
bool MsgHdr::parseWorld(const char *buf, const char *eoh, ObjWorld &world) {
UniqId id;
if (!parseUniqId(buf, eoh, id) || *buf != ' ')
return false;
int size = -1, wss = -1;
int hotPos = -1;
const char *p = buf;
if (isInt(p+1, wss, &p) && *p == '/' &&
isInt(p+1, size, &p) && *p == ' ' &&
isInt(p+1, hotPos)) {
world.id(id);
world.size(size);
world.wss(wss);
world.hotPos(hotPos);
return true;
}
return false;
}
bool MsgHdr::ParseHostInUri(const char *&start, const char *eorl, NetAddr &host) {
if (const char *newStart = SkipHostInUri(start, eorl, host)) {
start = newStart;
return true;
}
return false;
}
bool MsgHdr::parseUri(const char *&buf, const char *end, HttpUri &uri) {
const char *uriStart = buf;
// see if there is a protocol://host prefix
if (*buf != '/')
ParseHostInUri(buf, end, uri.host);
uri.pathBuf = buf; // includes leading '/'
// user-specified url_prefix may contain world-id tag;
// search for the first tag that is followed by a valid wid
while (const char *wid = StrBoundChr(buf, 'w', end)) {
if (parseUniqId(buf = wid+1, end, uri.oid.world()))
break;
}
if (const char *tid = StrBoundChr(buf, 't', end))
isInt(tid + 1, uri.oid.type(), &buf, 16);
if (const char *oid = StrBoundChr(buf, '_', end))
isInt(oid + 1, uri.oid.name(), &buf, 16);
// find the end of the Uri
while (buf < end && !isspace(*buf))
++buf;
uri.pathLen = buf - uri.pathBuf;
if (!uri.oid.world() || uri.oid.type() < 0 || uri.oid.name() < 0)
uri.oid.foreignUrl(String(uriStart, buf - uriStart));
return true;
}
bool MsgHdr::parseContLen(const char *buf, const char *) {
theContSize = xatoi(buf, -1);
return theContSize >= 0;
}
bool MsgHdr::parseContMd5(const char *buf, const char *eoh) {
if (DecodeBase64(buf, eoh - buf, theChecksum.buf(), theChecksum.size()) == theChecksum.size()) {
theChecksum.set(true);
return true;
}
return false;
}
bool MsgHdr::parseContType(const char *buf, const char *eoh) {
theContType = ctOther; // default
if (strncasecmp(buf, "text/", 5) == 0) {
buf += 5;
if (buf+4 <= eoh && strncasecmp(buf+2, "ml", 2) == 0)
theContType = ctMarkup;
else
if (buf+4 <= eoh && strncasecmp(buf+1, "ml", 2) == 0)
theContType = ctMarkup;
else
if (buf+3 <= eoh && strncasecmp(buf, "css", 3) == 0)
theContType = ctMarkup;
}
return true;
}
bool MsgHdr::parseDate(const char *buf, const char *eoh) {
theDate = HttpDateParse(buf, eoh - buf);
return theDate >= 0;
}
bool MsgHdr::parsePragma(const char *buf, const char *) {
if (!strncasecmp("no-cache", buf, 8))
isCachable = false;
else
return false;
return true;
}
bool MsgHdr::parseCControl(const char *buf, const char *) {
if (!strncasecmp("no-cache", buf, 8)) {
isCachable = false;
return true;
}
return false;
}
bool MsgHdr::parseXXact(const char *buf, const char *eoh) {
return
parseUniqId(buf, eoh, theGroupId) && *buf == ' ' &&
parseUniqId(++buf, eoh, theXactId);
}
bool MsgHdr::parseXRemWorld(const char *buf, const char *eoh) {
return parseWorld(buf, eoh, theRemWorld);
}
bool MsgHdr::parseXAbort(const char *buf, const char *) {
int whether = 0;
int where = 0;
const char *p = 0;
if (isInt(buf, whether, &p) && *p == ' ' && isInt(p+1, where)
&& whether && where) {
theAbortCoord.configure(whether, where);
return true;
}
return false;
}
bool MsgHdr::parseXPhaseSyncPos(const char *buf, const char *) {
thePhaseSyncPos = xatoi(buf, 0);
return true;
}
bool MsgHdr::parseXTarget(const char *buf, const char *eoh) {
return ParseNetAddr(buf, eoh, theTarget);
}
/* XXX: Connection and other headers may have a _list_ of options */
bool MsgHdr::parseConnection(const char *buf, const char *) {
if (!strncasecmp("close", buf, 5))
theConnectionKeepAlive = kaNo;
else
if (!strncasecmp("keep", buf, 4))
theConnectionKeepAlive = kaYes;
else
return false;
return true;
}
bool MsgHdr::parseTransferEncoding(const char *buf, const char *) {
if (!strncasecmp("chunked", buf, 7))
theTransferEncoding = tcChunked;
else
if (!strncasecmp("identity", buf, 8))
theTransferEncoding = tcIdentity;
else
theTransferEncoding = tcOther;
return true;
}
// adds definitions common to replies and requests
void MsgHdr::Configure(MsgHdrParsTab &tab) {
AddParser(hfpDate, &MsgHdr::parseDate, tab);
AddParser(hfpContLength, &MsgHdr::parseContLen, tab);
AddParser(hfpContMd5, &MsgHdr::parseContMd5, tab);
AddParser(hfpContType, &MsgHdr::parseContType, tab);
AddParser(hfpCacheControl, &MsgHdr::parseCControl, tab);
AddParser(hfpConnection, &MsgHdr::parseConnection, tab);
AddParser(hfpPragma, &MsgHdr::parsePragma, tab);
AddParser(hfpProxyConnection, &MsgHdr::parseConnection, tab);
AddParser(hfpTransferEncoding, &MsgHdr::parseTransferEncoding, tab);
AddParser(hfpXXact, &MsgHdr::parseXXact, tab);
AddParser(hfpXRemWorld, &MsgHdr::parseXRemWorld, tab);
AddParser(hfpXAbort, &MsgHdr::parseXAbort, tab);
AddParser(hfpXPhaseSyncPos, &MsgHdr::parseXPhaseSyncPos, tab);
AddParser(hfpXTarget, &MsgHdr::parseXTarget, tab);
}
int MsgHdr::AddParser(const String &field, Parser parser, MsgHdrParsTab &where) {
Assert(field);
const String trimmedField = isspace(field.last()) ? field(0, field.len()-1) : field;
Assert(trimmedField);
const int id = where.ids->add(trimmedField);
where.parsers->put(new Parser(parser), id);
return id;
}
void MsgHdr::store(OLog &log) const {
log
<< theHdrSize
<< (int)theDate.sec()
<< theContSize
// << theChecksum
<< theGroupId
<< theXactId
<< theTarget
<< theHttpVersion.vMinor() // XXX: log major too
<< (int)theConnectionKeepAlive
<< isCachable
// XXX: not stored or loaded: theTransferEncoding, theContType
;
}
// these should never be called
inline bool dontCallMe() { Assert(0); return false; }
bool MsgHdr::parseGetReqLine(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseHeadReqLine(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parsePostReqLine(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parsePutReqLine(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseHost(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseServer(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseLocation(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseLMT(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseExpires(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseIms(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseAcceptEncoding(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseXLocWorld(const char *, const char *) { return dontCallMe(); }
bool MsgHdr::parseCookie(const char *, const char *) { return dontCallMe(); }
/* ReqHdr */
ReqHdr::ReqHdr(): MsgHdr(*TheParsTab), isHealthCheck(false),
isAcceptingGzip(false) {
}
void ReqHdr::reset() {
MsgHdr::reset();
theUri = HttpUri();
theIms = Time();
theLocWorld.reset();
isHealthCheck = false;
isAcceptingGzip = false;
}
bool ReqHdr::parseRLine(const char *buf, const char *eorl) {
const int id = theParsTab.ids->lookup(buf, eorl - buf);
if (id > 0) {
buf += theParsTab.ids->string(id).len();
while (isspace(*buf)) ++buf;
Parser p = *theParsTab.parsers->item(id);
return (this->*p)(buf, eorl);
}
return false;
}
bool ReqHdr::parseAnyReqLine(const char *buf, const char *eorl) {
// a "well-known" health check uri
static const String health = "/health";
isHealthCheck = health.casePrefixOf(buf, eorl-buf);
parseUri(buf, eorl, theUri);
if (const char *proto = StrBoundChr(buf, ' ', eorl)) {
// optimization: not checking for "HTTP/" match
proto += 6;
if (proto < eorl)
parseHttpVersion(proto, eorl, theHttpVersion);
}
return true;
}
bool ReqHdr::parseGetReqLine(const char *buf, const char *eorl) {
if (parseAnyReqLine(buf, eorl)) {
theUri.oid.get(true);
return true;
}
return false;
}
bool ReqHdr::parseHeadReqLine(const char *buf, const char *eorl) {
if (parseAnyReqLine(buf, eorl)) {
theUri.oid.head(true);
return true;
}
return false;
}
bool ReqHdr::parsePostReqLine(const char *buf, const char *eorl) {
if (parseAnyReqLine(buf, eorl)) {
theUri.oid.post(true);
return true;
}
return false;
}
bool ReqHdr::parsePutReqLine(const char *buf, const char *eorl) {
if (parseAnyReqLine(buf, eorl)) {
theUri.oid.put(true);
return true;
}
return false;
}
bool ReqHdr::parseHost(const char *buf, const char *eoh) {
return ParseNetAddr(buf, eoh, theUri.host);
}
bool ReqHdr::parseIms(const char *buf, const char *eoh) {
theIms = HttpDateParse(buf, eoh - buf);
return theIms >= 0;
}
bool ReqHdr::parseAcceptEncoding(const char *buf, const char *eoh) {
// XXX: these checks ignore "q=0" preferences
isAcceptingGzip = StrBoundChr(buf, '*', eoh) ||
StrBoundStr(buf, "gzip", eoh); // XXX: codings are case-insensitive
return true;
}
bool ReqHdr::parseXLocWorld(const char *buf, const char *eoh) {
return parseWorld(buf, eoh, theLocWorld);
}
void ReqHdr::store(OLog &log) const {
MsgHdr::store(log);
log << theUri.host << theUri.oid << (int)theIms.sec();
}
bool ReqHdr::expectBody() const {
return theUri.oid.post() || theUri.oid.put();
}
bool ReqHdr::acceptedEncoding(int coding) const {
return coding == codingIdentity || // always acceptable for now
(coding == codingGzip && isAcceptingGzip);
}
void ReqHdr::Configure() {
TheParsTab = new MsgHdrParsTab();
MsgHdr::Configure(*TheParsTab);
AddParser(hfpHost, &MsgHdr::parseHost, *TheParsTab);
AddParser(hfpIMS, &MsgHdr::parseIms, *TheParsTab);
AddParser(hfpXLocWorld, &MsgHdr::parseXLocWorld, *TheParsTab);
AddParser(hfpAcceptEncoding, &MsgHdr::parseAcceptEncoding, *TheParsTab);
// request method parsers use the same index/interface as field parsers
AddParser(rlpGet, &MsgHdr::parseGetReqLine, *TheParsTab);
AddParser(rlpHead, &MsgHdr::parseHeadReqLine, *TheParsTab);
AddParser(rlpPost, &MsgHdr::parsePostReqLine, *TheParsTab);
AddParser(rlpPut, &MsgHdr::parsePutReqLine, *TheParsTab);
}
void ReqHdr::Clean() {
delete TheParsTab;
TheParsTab = 0;
}
/* RepHdr */
bool RepHdr::PositiveStatusCode(int code) {
/* 1xx: Informational - Request received, continuing process
* 2xx: Success - The action was successfully received, understood, and accepted
* 3xx: Redirection - Further action must be taken in order to complete the request
* 4xx: Client Error - The request contains bad syntax or cannot be fulfilled
* 5xx: Server Error - The server failed to fulfill an apparently valid request */
return 100 <= code && code < 400;
}
RepHdr::RepHdr(): MsgHdr(*TheParsTab), theStatus(scUnknown), theCookies(0) {
}
void RepHdr::reset() {
MsgHdr::reset();
theServer = String();
theLocn = HttpUri();
theLMT = theExpires = Time();
theStatus = scUnknown;
theCookies = 0;
}
void RepHdr::collectCookies(HttpCookies *cookies) {
theCookies = cookies;
}
bool RepHdr::expectPolyHeaders() const {
return expectBody() &&
theStatus != sc407_ProxyAuthRequired &&
theStatus != sc403_Forbidden &&
!redirect();
}
// RFC 2616: All responses to the HEAD request method MUST NOT include
// a message-body. All 1xx , 204, and 304 responses MUST NOT include a
// message-body. All other responses do include a message-body.
bool RepHdr::expectBody() const {
// note: we cannot handle the HEAD case here; the caller should
if ((100 <= theStatus && theStatus < 200) ||
theStatus == sc204_NoContent ||
theStatus == sc304_NotModified)
return false;
return true;
}
bool RepHdr::redirect() const {
return
theStatus == sc300_Choices ||
theStatus == sc302_Found ||
theStatus == sc303_Other ||
theStatus == sc307_TmpRedir;
}
Time RepHdr::calcLmt() const {
if (theLMT >= 0)
return theLMT;
if (theDate >= 0)
return theDate;
return TheClock;
}
bool RepHdr::parseRLine(const char *buf, const char *eorl) {
if (strncasecmp("HTTP/", buf, 5) != 0)
return false;
buf += 5;
if (buf >= eorl || !parseHttpVersion(buf, eorl, theHttpVersion))
return false;
buf += 1;
return isInt(buf, theStatus);
}
bool RepHdr::parseServer(const char *buf, const char *eoh) {
theServer = String(buf, eoh-buf);
return theServer.len() > 0;
}
bool RepHdr::parseLocation(const char *buf, const char *eoh) {
return parseUri(buf, eoh, theLocn);
}
bool RepHdr::parseLMT(const char *buf, const char *eoh) {
theLMT = HttpDateParse(buf, eoh - buf);
return theLMT >= 0;
}
bool RepHdr::parseExpires(const char *buf, const char *eoh) {
theExpires = HttpDateParse(buf, eoh - buf);
return theExpires >= 0;
}
bool RepHdr::parseCookie(const char *buf, const char *eoh) {
if (theCookies) {
HttpCookie *cookie = HttpCookieParse(buf, eoh - buf);
if (!Should(cookie))
return false;
theCookies->add(cookie);
}
return true;
}
void RepHdr::store(OLog &log) const {
MsgHdr::store(log);
log
<< theStatus
<< (int)theLMT.sec()
<< (int)theExpires.sec()
;
}
void RepHdr::Configure() {
TheParsTab = new MsgHdrParsTab();
MsgHdr::Configure(*TheParsTab);
AddParser(hfpLocation, &MsgHdr::parseLocation, *TheParsTab);
AddParser(hfpServer, &MsgHdr::parseServer, *TheParsTab);
AddParser(hfpLmt, &MsgHdr::parseLMT, *TheParsTab);
AddParser(hfpExpires, &MsgHdr::parseExpires, *TheParsTab);
AddParser(hfpSetCookie, &MsgHdr::parseCookie, *TheParsTab);
}
void RepHdr::Clean() {
delete TheParsTab;
TheParsTab = 0;
}
/* MsgHdrParsTab */
MsgHdrParsTab::MsgHdrParsTab() {
ids = new StrIdentifier;
parsers = new Array<MsgHdr::Parser*>;
}
MsgHdrParsTab::~MsgHdrParsTab() {
delete ids; ids = 0;
while (parsers->count()) delete parsers->pop();
delete parsers; parsers = 0;
}
syntax highlighted by Code2HTML, v. 0.9.1