/* Web Polygraph http://www.web-polygraph.org/
* (C) 2003-2006 The Measurement Factory
* Licensed under the Apache License, Version 2.0 */
#include "base/polygraph.h"
#include <ctype.h>
#include <limits.h>
#include "xstd/h/sstream.h"
#include "xstd/h/iomanip.h"
#include "xstd/Clock.h"
#include "xstd/gadgets.h"
#include "base/polyLogCats.h"
#include "base/RndPermut.h"
#include "base/AddrParsers.h"
#include "runtime/Connection.h"
#include "runtime/AddrMap.h"
#include "runtime/HostMap.h"
#include "runtime/PubWorld.h"
#include "runtime/HttpDate.h"
#include "runtime/StatPhase.h"
#include "runtime/StatPhaseMgr.h"
#include "runtime/StatPhaseSync.h"
#include "runtime/SharedOpts.h"
#include "runtime/PageInfo.h"
#include "runtime/ConnMgr.h"
#include "runtime/HttpCookies.h"
#include "runtime/ErrorMgr.h"
#include "runtime/LogComment.h"
#include "runtime/httpText.h"
#include "runtime/polyBcastChannels.h"
#include "runtime/polyErrors.h"
#include "runtime/globals.h"
#include "csm/ContentCfg.h"
#include "csm/ContentMgr.h"
#include "csm/oid2Url.h"
#include "client/CltOpts.h"
#include "client/Client.h"
#include "client/ServerRep.h"
#include "client/PrivCache.h"
#include "client/CltDataFilterRegistry.h"
#include "client/ChunkedCodingParser.h"
#include "client/AnyBodyParser.h"
#include "client/MarkupBodyParser.h"
#include "client/UriScriptBodyParser.h"
#include "client/ParseBuffer.h"
#include "client/RegExGroups.h"
#include "client/MembershipMap.h"
#include "client/AclGroup.h"
#include "client/CltCfg.h"
#include "client/PipelinedCxm.h"
#include "client/CltXact.h"
static ObjFarm<PipelinedCxm> ThePipelineMgrs;
CltXact::CltXact(): theOwner(0), thePage(0), theMgr(0), theSelfMgr(*this) {
CltXact::reset();
}
void CltXact::reset() {
Xaction::reset();
theOwner = 0;
if (thePage)
PageInfo::Abandon(thePage);
if (theMgr) {
if (theMgr != &theSelfMgr && --theMgr->useLevel <= 0)
ThePipelineMgrs.put((PipelinedCxm*)theMgr);
theMgr = 0;
}
theSrvRep = 0;
theContentCfg = 0;
theBodyParser = 0;
theNextHop = NetAddr();
theRepHdr.reset();
theOlcTimes.reset();
theReqBodySize = -1;
thePeerState = peerUnknown;
theCred.reset();
theSavedRepHeader.reset();
theCause = 0;
theChildCount = 0;
doRetry = false;
theLogCat = lgcCltSide;
}
Agent *CltXact::owner() {
return theOwner;
}
const UniqId &CltXact::reqId() const {
return theId;
}
void CltXact::enqueue() {
theEnqueTime = TheClock;
}
CltXactMgr *CltXact::getPipeline() {
Assert(theMgr);
if (!theConn->pipelineable())
return 0;
// if we are not already pipelining, create a pipeline manager
if (theMgr == &theSelfMgr) {
ThePipelineMgrs.limit(1024); // magic, no good way to estimate
Assert(theState == stBodyWaiting);
theSelfMgr.releaseReadControl(this);
PipelinedCxm *mgr = ThePipelineMgrs.get();
++mgr->useLevel;
mgr->assumeReadControl(this, theMgr);
theMgr = mgr;
}
return theMgr;
}
void CltXact::pipeline(CltXactMgr *aMgr) {
Assert(theMgr != aMgr); // or we will overincrement useLevel
theMgr = aMgr;
++theMgr->useLevel;
theConn = theMgr->conn();
Assert(theConn);
theConn->startUse();
}
void CltXact::page(PageInfo *aPage) {
if (Should(!thePage && aPage))
thePage = PageInfo::Use(aPage);
}
PageInfo *CltXact::page() {
return thePage;
}
void CltXact::exec(Client *anOwner, Connection *aConn) {
Assert(theOid);
Assert(anOwner);
theOwner = anOwner;
if (!theMgr)
theMgr = &theSelfMgr;
start(aConn);
if (!theOid.foreignUrl()) {
theSrvRep = TheHostMap->serverRepAt(theOid.viserv());
theContentCfg = TheContentMgr.get(theOid.type());
}
theOwner->cfg()->selectAbortCoord(theAbortCoord);
Should (!theConn->tunneling() || theConn->sslConfigured());
newState(theConn->tunneling() && !theConn->sslActive() ?
stTunnelConnWaiting : stConnWaiting); // XXX: not last call
theMgr->control(this);
}
void CltXact::finish(Error err) {
if (!err && theRepHdr.redirect())
redirect();
// special actions for objects with bodies
if (!theOid.head() && theBodyParser && theBodyParser->used()) {
// check MD5
if (!err && !theOid.aborted() && theRepHdr.theChecksum.set()) {
theCheckAlg.final();
if (!theCheckAlg.sum().equal(theRepHdr.theChecksum)) {
if (ReportError(errChecksum)) {
theRepHdr.theChecksum.print(Comment << "MD5 expected: ") << endc;
theCheckAlg.sum().print(Comment << "MD5 received: ") << endc;
if (TheOpts.theDumpFlags(dumpErr, dumpAny))
printMsg(theConn->theRdBuf);
}
err = errOther;
}
}
// cache
if (!err && theOid.cachable()) {
if (PrivCache *cache = theOwner->privCache())
cache->storeOid(theOid);
}
}
if (err && theOwner->privCache())
theOwner->privCache()->purgeOid(theOid);
if (err) {
if (err != errPipelineAbort)
theMgr->noteAbort(this);
} else {
theMgr->noteDone(this);
}
if (thePage) {
thePage->size += theRepSize.actual();
if (thePage->loneUser() && Should(thePage->start >= 0)) {
thePage->lifeTime = TheClock - thePage->start;
Broadcast(ThePageEndChannel, thePage);
}
PageInfo::Abandon(thePage);
}
Xaction::finish(err);
if (theBodyParser) {
theBodyParser->farm().put(theBodyParser);
theBodyParser = 0;
}
theOwner->noteXactDone(this);
}
void CltXact::noteAbort() {
newState(stDone);
}
void CltXact::controlledMasterRead() {
if (!Should(theConn))
return;
if (abortIo(&Connection::read))
return;
if (!Should(theConn))
return;
if (theConn->bad()) {
if (!doRetry) {
const bool hdrWaiting =
theState == stHdrWaiting || theState == stTunnelRespWaiting;
doRetry = hdrWaiting &&
!theConn->theRdBuf.contSize() && theConn->useCnt() > 1;
}
finish(errOther);
return;
}
controlledPostRead();
}
void CltXact::controlledPostRead() {
if (theState == stHdrWaiting) {
if (const Error err = getHeader()) {
finish(err);
return;
}
}
if (theState == stBodyWaiting) {
getBody();
return;
}
}
Error CltXact::getHeader() {
Assert(theState == stHdrWaiting || theState == stTunnelRespWaiting);
if (theRepHdr.parse(theConn->theRdBuf.content(), theConn->theRdBuf.contSize())) {
// change state immediately if we just established a tunnel
if (theState == stTunnelRespWaiting) {
if (theRepHdr.theStatus == RepHdr::sc200_OK) {
static int connRespCount = 0;
if (!connRespCount++ || TheOpts.theDumpFlags(dumpRep, dumpHdr))
printMsg(theConn->theRdBuf, theRepHdr.theHdrSize);
theConn->theRdBuf.consumed(theRepHdr.theHdrSize);
theRepHdr.reset();
newState(stConnWaiting);
theMgr->rewind(this);
return 0;
} else {
// assume the normal code will handle the error
newState(stHdrWaiting); // XXX: not last call in this function
// continue
}
}
const Error err = interpretHeader();
// dump reply header after interpretHeader() to dump more oid flags
static int respCount = 0;
if (!respCount++ || TheOpts.theDumpFlags(dumpRep, dumpHdr))
printMsg(theConn->theRdBuf, theRepHdr.theHdrSize);
if (err)
return err;
if (theHttpStatus == RepHdr::sc407_ProxyAuthRequired)
saveRepHeader();
consume(theRepHdr.theHdrSize);
Assert(theState == stBodyWaiting);
return err;
} else
if (expectMore()) {
if (theConn->theRdBuf.full())
return errHugeHdr;
else
return 0;
} else {
const bool readNothing = !theConn->theRdBuf.contSize();
// HTTP pconn race condition?
const bool race = readNothing && theConn->useCnt() > 1;
doRetry = doRetry || race;
if (race)
return errOther;
else
return readNothing ? errNoHdrClose : errPrematureEoh;
}
Should(false); // not reached
return errOther;
}
void CltXact::getBody() {
Assert(theState == stBodyWaiting);
Assert(theBodyParser);
parse();
if (expectMore()) {
checkOverflow();
return;
}
const Size leftoverSize = unconsumed();
// no more data, set expected size if it was unknown
if (!theRepSize.expected().known())
theRepSize.expect(theRepSize.actual() + leftoverSize);
const bool pgAborted = cfgAbortedReply();
// make sure we do not leave anything behind and complain if needed
if (leftoverSize > 0) {
if (pgAborted) {
// no need to complain, a Polygraph-initiated abort
} else
if (theRepSize.actual() + leftoverSize < theRepSize.expected()) {
// no need for parser to complain since it is not a content error
} else {
const ParseBuffer leftovers(theConn->theRdBuf.content(),
leftoverSize);
theBodyParser->noteLeftovers(leftovers);
}
consume(leftoverSize);
}
authWithProxy(theHttpStatus == RepHdr::sc407_ProxyAuthRequired);
if (theRepSize.expected() == theRepSize.actual()) {
finish(0);
return;
}
if (theRepSize.expected() < theRepSize.actual()) {
finish(errExtraRepData);
return;
}
Assert(theRepSize.expected() > theRepSize.actual());
if (pgAborted) {
theConn->lastUse(true);
theOid.aborted(true);
finish(0); // not an error, configuration told server to abort
return;
}
finish(errPrematureEof);
}
void CltXact::parse() {
IOBuf &buf = theConn->theRdBuf;
const Size availSize = unconsumed();
const ParseBuffer data = availSize > 0 ?
ParseBuffer(buf.content(), availSize) : ParseBuffer();
if (const Size parsedSz = theBodyParser->noteData(data)) {
consume(parsedSz);
theConn->theRdBuf.pack();
}
if (theRepSize.expectingWhatParsed()) {
Should(!theRepSize.expected().known());
theRepSize.expect(theRepSize.actual());
}
}
void CltXact::checkOverflow() {
const IOBuf &buf = theConn->theRdBuf;
if (buf.contSize() >= buf.capacity()) { // full() requires pack()!
const Size leftoverSize = unconsumed();
const ParseBuffer leftovers(buf.content(), leftoverSize);
theBodyParser->noteOverflow(leftovers);
// if needed, we can let the parser decide how much to chop off
consume(leftoverSize);
}
}
// fill buffer space under manager control
bool CltXact::controlledFill(bool &needMore) {
WrBuf &buf = theConn->theWrBuf;
const Size reservedSpace = buf.capacity()/2;
needMore = buf.spaceSize() < reservedSpace;
if (needMore)
return true;
const char *reqStart = buf.space();
makeReqHeaders();
// append request body if needed
if (theReqBodySize.known()) {
const char *bodyStart = buf.space();
static RndGen rng(LclPermut(rndReqBody));
const Size offset = IOBuf::RandomOffset(rng.ltrial(), 0);
theReqBodySize = buf.appendRndUpTo(offset, theReqBodySize);
theReqSize += theReqBodySize;
if (TheOpts.theDumpFlags(dumpReq, dumpBody))
printMsg(bodyStart, theReqBodySize);
}
if (buf.full()) { // the request does not fit in the reserved buffer
if (ReportError(errReqDontFit)) {
Comment << "reserved buffer capacity: " << reservedSpace << endc;
printMsg(reqStart, theReqSize);
}
finish(errOther);
return false;
}
return true;
}
// one transaction writes buffer under manager control
bool CltXact::controlledMasterWrite(Size &size) {
WrBuf &buf = theConn->theWrBuf;
Size sz;
if (buf.contSize() > 0 && abortIo(&Connection::write, &sz))
return false;
if (theConn->bad()) {
Assert(theReqSize >= 0);
// HTTP pconn race condition?
const bool race = buf.contSize() == theReqSize && theConn->useCnt() > 1;
doRetry = doRetry || race;
finish(errOther);
return false;
}
size += sz;
return true;
}
// possibly many transactions move on, after a single raw write
bool CltXact::controlledPostWrite(Size &size, bool &needMore) {
if (size >= theReqSize) {
size -= theReqSize;
needMore = false;
} else {
needMore = true;
}
if (!needMore) {
if (theState == stSpaceWaiting && theOid.foreignUrl())
TheEmbedStats.foreignUrlRequested++;
newState(theState == stTunnelSpaceWaiting ?
stTunnelRespWaiting : stHdrWaiting);
}
return true;
}
void CltXact::controlledAbort() {
doRetry = true; // retry if other tranactions caused this one to fail
finish(errPipelineAbort);
}
void CltXact::makeReqHeaders() {
WrBuf &buf = theConn->theWrBuf;
ofixedstream os(buf.space(), buf.spaceSize());
if (theState == stTunnelConnWaiting) {
makeConnectReq(os);
newState(stTunnelSpaceWaiting);
} else
if (theState == stConnWaiting) {
if (theConn->sslConfigured() && !theConn->sslActive())
theConn->sslActivate();
makeExplicitReq(os);
newState(stSpaceWaiting);
}
}
// make a CONNECT request
void CltXact::makeConnectReq(ostream &os) {
os << rlpConnect;
makeReqHost(os);
makeReqVersion(os);
os << crlf;
static int reqCount = 0;
finishReqHdrs(os, !reqCount++);
// no body for CONNECT requests
}
// make a non-CONNECT request
void CltXact::makeExplicitReq(ostream &os) {
// decide whether the request should have a body
if (theOid.post() || theOid.put()) {
static RndGen rng;
// try to make sure we will have space after the headers;
// may not work for small buffers (default 16KB is OK)
theReqBodySize = rng(0, theConn->theWrBuf.spaceSize() / 2);
}
// make headers
{
makeReqMethod(os);
makeEndToEndHdrs(os);
makeHopByHopHdrs(os);
static int reqCount = 0;
finishReqHdrs(os, !reqCount++);
}
// where we should abort
theAbortSize = theAbortCoord.pos(theReqSize, theReqBodySize);
}
void CltXact::finishReqHdrs(ostream &os, bool forceDump) {
IOBuf &buf = theConn->theWrBuf;
const char *reqStart = buf.space();
buf.appended((streamoff)os.tellp());
// give filters a chance, they may add their own headers
TheCltDataFilterRegistry().apply(this, buf);
buf.append("\r\n", Min(buf.spaceSize(), Size(2))); // end-of-header
// may be adjusted in makeExplicitReq() if there is a body
theReqSize = buf.space() - reqStart;
// dump request header
if (forceDump || TheOpts.theDumpFlags(dumpReq, dumpHdr))
printMsg(reqStart, theReqSize);
}
void CltXact::makeReqMethod(ostream &os) {
if (theOid.get())
os << rlpGet;
else
if (theOid.post())
os << rlpPost;
else
if (theOid.head())
os << rlpHead;
else
if (theOid.put())
os << rlpPut;
else
Assert(false);
}
void CltXact::makeReqVersion(ostream &os) {
if (theOwner->httpVersion() <= HttpVersion(1,0))
os << rlsHttp1p0;
else
os << rlsHttp1p1;
}
void CltXact::makeEndToEndHdrs(ostream &os) {
if (theOwner->proxy()) {
// proxied connection, use full URL
Oid2Url(theOid, os);
} else {
// transparent folks get an advantage of shorter request-lines
Oid2UrlPath(theOid, os);
}
makeReqVersion(os);
/* request-header fields */
os << hfAccept;
if (const String *codings = theOwner->cfg()->theAcceptedContentCodings)
os << hfpAcceptEncoding << *codings << crlf;
os << hfpHost;
const HostCfg *host = makeReqHost(os);
os << crlf;
if (theOid.imsAny() && olcTimes().lmt() >= 0) {
const Time t = theOid.ims200() ?
(olcTimes().lmt() - Time::Sec(1)) :
(olcTimes().lmt() + Time::Sec(1));
HttpDatePrint(os << hfpIMS, t) << crlf;
}
if (theOid.reload())
os << hfReload;
/* entity-header fields */
os << hfpXXact << TheGroupId << ' ' << theId << crlf;
// send public world info
if (!theOid.foreignUrl()) {
const PubWorld &pubWorld = *host->thePubWorld;
os << hfpXLocWorld << pubWorld.localSlice() << crlf;
if (const PubWorldSlice *slice = pubWorld.sliceToSync())
os << hfpXRemWorld << *slice << crlf;
if (theSrvRep)
os << hfpXTarget << theSrvRep->addr() << crlf;
}
os << hfpXAbort << theAbortCoord.whether()
<< ' ' << theAbortCoord.where() << crlf;
// report our readiness to change phase
os << hfpXPhaseSyncPos << TheStatPhaseMgr.phaseSyncPos() << crlf;
if (theReqBodySize.known())
os << "Content-length: " << (int)theReqBodySize << crlf;
if (host && host->theCookies)
makeCookies(os, host->theCookies);
}
void CltXact::makeHopByHopHdrs(ostream &os) {
/* general-header fields */
// persistency indication depends on HTTP version
if (theOwner->httpVersion() <= HttpVersion(1,0)) {
if (theConn->reusable())
os << (theOwner->proxy() ? hfConnAlivePxy : hfConnAliveOrg);
} else {
if (!theConn->reusable())
os << (theOwner->proxy() ? hfConnClosePxy : hfConnCloseOrg);
}
if (theOwner->credsForProxy(theOid, theCred))
PrintBase64(os << hfpProxyAuthorization, theCred.image().data(), theCred.image().len()) << crlf;
}
const HostCfg *CltXact::makeReqHost(ostream &os) {
if (theOid.foreignUrl()) {
const char *furi = theOid.foreignUrl().cstr();
NetAddr addr;
if (SkipHostInUri(furi, furi+theOid.foreignUrl().len(), addr))
os << addr.addrA() << ':' << addr.port();
return 0;
} else
if (const HostCfg *host = TheHostMap->at(theOid.viserv())) {
const NetAddr &visName = host->theAddr;
os << visName.addrA() << ':' << visName.port();
return host;
}
Should(false);
return 0;
}
void CltXact::makeCookies(ostream &os, HttpCookies *cookies) {
// configure future response parser to collect new cookies
int keepLimit = 0;
if (!theOwner->doCookies(keepLimit))
return;
cookies->keepLimit(keepLimit);
theRepHdr.collectCookies(cookies);
// send back all cookies we kept (if any)
const int count = Min(keepLimit, cookies->count());
Size accSize = 0;
for (int i = 0; i < count; ++i) {
const String &cookie = cookies->get(i);
const Size cookieSize = cookie.len();
os << hfpCookie;
WrBuf &buf = theConn->theWrBuf;
const Size usedSize = (streamoff)os.tellp();
const Size spaceRemaining = buf.spaceSize() - usedSize;
const bool fit = cookieSize <= spaceRemaining;
if (!fit && ReportError(errCookiesDontFit)) {
Comment << "cookie size: " << cookieSize <<
" cookies buffered: " << i << '/' << count << " or " <<
accSize << "; space left: " << spaceRemaining << endc;
}
os << cookie; // note: kept cookies contain terminating CRLF;
accSize += cookieSize;
if (!fit)
break;
}
}
void CltXact::consume(Size size) {
theConn->theRdBuf.consumed(size);
theRepSize.got(size);
}
Error CltXact::interpretHeader() {
// does reply line make sense?
if (!theRepHdr.theHttpVersion.known() || theRepHdr.theStatus < 0)
return errHttpRLine;
if (theSrvRep)
theSrvRep->noteRequest(theOid);
if (const Error err = setStatusCode(theRepHdr.theStatus))
return err;
// check content-length, set RepSize if possible
if (theRepHdr.expectBody()) {
if (theOid.head())
theRepSize.expect(theRepHdr.theHdrSize);
else
if (theRepHdr.theTransferEncoding == MsgHdr::tcChunked) {
if (theRepHdr.theContSize >= 0) // MUST ignore
ReportError(errChunkedButCLen);
} else
if (theRepHdr.theContSize >= 0)
theRepSize.expect(theRepHdr.theHdrSize + theRepHdr.theContSize);
else
if (theRepHdr.persistentConnection())
return errPersistButNoCLen;
} else {
if (theRepHdr.theContSize >= 0)
return errUnexpectedCLen;
theRepSize.expect(theRepHdr.theHdrSize);
}
// check that we can handle transfer encoding
if (theRepHdr.theTransferEncoding == MsgHdr::tcOther)
return errUnknownTransferEncoding;
theOid.repToRedir(theRepHdr.redirect());
if (!theRepHdr.polyHeaders() && theRepHdr.expectPolyHeaders()) {
theOid.foreignSrc(true);
noteError(errForeignSrc);
}
if (theOid.foreignUrl())
TheEmbedStats.foreignUrlReceived++;
checkAcl();
// firstHand here means our request reached the server and
// server's reply reached us
const bool firstHand = reqId().myMutant(theRepHdr.theXactId);
theOid.cachable(theRepHdr.isCachable);
theOid.hit(!firstHand && theRepHdr.theXactId && theOid.basic());
if (theOid.hit()) {
if (!theOid.offeredHit() && !TheCltOpts.ignoreFalseHits)
noteError(errFalseHit);
if (!theOid.cachable())
noteError(errUnchbHit);
}
if (firstHand) { // native miss
checkDateSync();
firstHandSync();
} else
if (theRepHdr.theXactId) { // native second-hand response
if (theOid.reload())
noteError(errReloadHit);
} else
if (theRepHdr.theDate >= 0 && theRepHdr.theDate < HttpDateAtMost(theStartTime)) {
// our response is aged: it was generated before we asked for it
if (theOid.reload())
noteError(errReloadHit);
}
if (theOid.offeredHit() && !theOid.hit())
noteError(errFalseMiss);
checkFreshness();
// note if the server will close the connection
if (!theRepHdr.persistentConnection()) {
theConn->lastUse(true);
theMgr->noteLastXaction(this);
}
theBodyParser = selectBodyParser();
newState(stBodyWaiting);
return 0;
}
void CltXact::noteError(const Error &err) {
if (err == errFalseMiss) {
if (TheCltOpts.printFalseMisses) {
Oid2Url(theOid, cout << "False-Miss: ");
cout << endl;
printMsg(theConn->theRdBuf, theRepHdr.theHdrSize);
}
return; // do not report false misses; they may not be errors
}
if (err == errForeignSrc) {
if (theOid.foreignUrl())
return; // response to a foreign URL may be foreign
static bool informed = false;
if (!informed) {
Comment(5) << "fyi: received first foreign response to " <<
"Polygraph-specific URL:" << endc;
printMsg(theConn->theRdBuf);
informed = true;
}
if (theHttpStatus == RepHdr::sc200_OK && theOwner->cfg()->acl())
return; // let checkAcl() handle this case
if (TheOpts.acceptForeignMsgs)
return; // the user told us to accept all foreign responses
}
if (!ReportError(err))
return;
/* supply additional information */
if (err == errStaleHit || err == errReloadHit) {
HttpDatePrint(cout << "\trep: ", theRepHdr.theDate) << endl;
HttpDatePrint(cout << "\tlmt: ", olcTimes().lmt()) << endl;
HttpDatePrint(cout << "\treq: ", theStartTime) << endl;
HttpDatePrint(cout << "\tnow: " ) << endl;
HttpDatePrint(cout << "\texp: ", olcTimes().exp()) << endl;
}
if (TheOpts.theDumpFlags(dumpErr, dumpAny))
printMsg(theConn->theRdBuf);
}
void CltXact::noteCredentialsGone() {
theCred.reset();
}
void CltXact::saveRepHeader() {
theConn->theRdBuf.copyContent(theSavedRepHeader, theRepHdr.theHdrSize);
}
void CltXact::firstHandSync() {
// update public world info
if (theRepHdr.theRemWorld)
updatePubWorld(theRepHdr.theRemWorld);
// sync oid accounting on first-hand replies
if (theSrvRep)
theSrvRep->syncSrvInfo(theRepHdr);
// sync phases
if (theRepHdr.thePhaseSyncPos >= 0 && theRepHdr.theGroupId)
TheStatPhaseSync.notePhaseSync(theRepHdr.theGroupId, theRepHdr.thePhaseSyncPos);
}
void CltXact::authWithProxy(bool needed) {
bool reportErrDetails = false;
if (needed) { // denied
if (!theCred.image()) { // sent nothing
doRetry = true;
theOwner->noteProxyAuthReq(this);
} else
if (theCred.valid()) { // sent valid
reportErrDetails = ReportError(errProxyAuthDenied);
}
} else { // allowed
if (theCred.image() && !theCred.valid()) // sent invalid
reportErrDetails = ReportError(errProxyAuthAllowed);
}
if (reportErrDetails) {
Comment << "robot: " << theOwner->host()
<< " credentials: " << theCred.image()
<< " proxy: " << theOwner->proxy() << endc;
}
}
void CltXact::redirect() {
ObjId dest = theRepHdr.theLocn.oid;
dest.rediredReq(true);
dest.get(true);
if (!dest.foreignUrl()) {
if (!validRelOid(dest)) {
ReportError(errRedirLocation);
return;
}
if (theRepHdr.theLocn.host) {
if (const Error err = setViserv(theRepHdr.theLocn.host, dest)) {
noteError(err);
return;
}
} else {
dest.viserv(theOid.viserv());
}
theOwner->selectTarget(dest);
dest.repeat(true); // polysrv only redirects to seen URLs
} else {
dest.repeat(false); // XXX: may be a repeat, we do not know
}
// XXX: hot() is unset
theOwner->noteRedirect(this, dest);
}
BodyParser *CltXact::selectBodyParser() {
BodyParser *cparser = selectContentParser();
if (theRepHdr.chunkedEncoding())
return ChunkedCodingParser::GetOne(this, cparser);
return cparser;
}
BodyParser *CltXact::selectContentParser() {
// parse if content is markup
if (theRepHdr.markupContent())
return selectMarkupBodyParser();
// if content is unknown, use URL extension (if any) to guess type
if (!theRepHdr.knownContentType() && OidImpliesMarkup(theOid, theContentCfg))
return selectMarkupBodyParser();
// parse if domestic content may have embedded tags
if (!theOid.foreignUrl() && theContentCfg->hasEmbedCont())
return selectMarkupBodyParser();
// do not parse otherwise
return AnyBodyParser::GetOne(this);
}
BodyParser *CltXact::selectMarkupBodyParser() {
if (!thePage) {
thePage = PageInfo::Create();
thePage->start = theStartTime;
}
if (theOwner->cfg()->followAllUris(theRepHdr))
return UriScriptBodyParser::GetOne(this, theOwner->cfg());
return MarkupBodyParser::GetOne(this, theOwner->cfg());
}
bool CltXact::expectMore() const {
if (theConn->atEof())
return false;
if (!theRepSize.expected().known())
return true;
return theRepSize.expected() > theRepSize.actual() + unconsumed();
}
Size CltXact::unconsumed() const {
return theRepSize.expectToGet(theConn->theRdBuf.contSize());
}
const ObjTimes &CltXact::olcTimes() const {
// not calculated or not configured, but can be
if (theOlcTimes.lmt() < 0 && theContentCfg)
theContentCfg->calcTimes(theOid, theOlcTimes);
return theOlcTimes;
}
bool CltXact::validRelOid(const ObjId &oid) const {
return oid.world() && oid.name() > 0 &&
TheContentMgr.validId(oid.type());
}
void CltXact::checkAcl() {
const AclGroup &acl = theOwner->cfg()->acl();
if (!acl) // no access controls configured
return;
if (!acl.needsCheck(theOid.foreignUrl()))
return;
if (RepHdr::PositiveStatusCode(theHttpStatus)) {
if (!theOid.foreignUrl() && theOid.foreignSrc())
checkAclMatch(acl.rewrite(), "rewritten");
else
checkAclMatch(acl.allow(), "allowed");
return;
}
if (theHttpStatus == RepHdr::sc407_ProxyAuthRequired) {
// do not check acls if we did not authenticate yet but can
if (!theCred.image() && theOwner->hasCredentials())
return;
// do not check acls if we sent invalid credentials
if (theCred.image() && !theCred.valid())
return;
}
checkAclMatch(acl.deny(), "denied");
}
void CltXact::checkAclMatch(const RegExGroup *matchedGrp, const char *action) {
RegExMatchee m;
buildAclMatchee(m);
static Array<RegExGroup*> matches;
matches.reset();
theOwner->cfg()->acl().match(m, matches);
bool explainMatch = false;
if (matches.count() == 0)
explainMatch = ReportError(errAclNoMatches);
else
if (matches.count() == 1 && matches.last() != matchedGrp)
explainMatch = ReportError(errAclWrongMatch);
else
if (matches.count() > 1)
explainMatch = ReportError(errAclManyMatches);
if (explainMatch)
explainAclMatch(m, action, matches);
}
void CltXact::buildAclMatchee(RegExMatchee &m) const {
// form complete URL
static WrBuf buf;
buf.reset();
ofixedstream os(buf.space(), buf.spaceSize()-Size(1));
m.urlHost = buf.space() + 0; // URL host alone
Oid2UrlHost(theOid, os);
Should(os << ends);
m.urlPath = buf.space() + os.tellp(); // URL path alone
Oid2UrlPath(theOid, os);
Should(os << ends);
m.url = buf.space() + os.tellp(); // complete URL
Oid2Url(theOid, os);
Should(os << ends);
buf.appended(Size(os.tellp()));
buf.append("", 1); // terminate even if stream is full
m.userName = theCred.image().cstr();
m.memberships = &theOwner->memberships();
}
void CltXact::explainAclMatch(const RegExMatchee &m, const char *action, const Array<RegExGroup*> &ourMatches) const {
ostream &os = Comment(6) << "URL: " << m.url << endl;
if (theCred.valid())
os << "\tuser: " << theCred.image() << endl;
os << "\tmembership maps: " << m.memberships->count() << endl;
os << "\tgroups: ";
for (int g = 0; g < m.memberships->count(); ++g) {
if (g)
os << "; ";
dumpMatchingGroupNames(os, m.memberships->item(g));
if (g >= 10) {
os << "; ...";
break;
}
}
os << endl;
os << "\twas probably " << action
<< " but matches " << ourMatches.count() << " rule(s)";
for (int i = 0; i < ourMatches.count(); ++i) {
os << (i == 0 ? ": " : ", ");
os << theOwner->cfg()->acl().ruleName(ourMatches[i]);
}
os << endc;
}
void CltXact::dumpMatchingGroupNames(ostream &os, const MembershipMap *map) const {
MembershipMap::GroupIterator i = map->groupIterator(theCred);
for (int count = 0; i && count <= 5; ++count, ++i) {
if (count)
os << ", ";
os << *i;
}
}
void CltXact::checkFreshness() {
// cannot check without the date header
if (theRepHdr.theDate < 0)
return;
// skip in-transit modicications to avoid false errors due to racing
if (HttpDateAtMost(theStartTime) <= olcTimes().lmt())
return;
// response is stale if it was generated before modification time
if (theRepHdr.theDate < olcTimes().lmt())
noteError(errStaleHit);
}
bool CltXact::cfgAbortedReply() const {
Assert(theRepSize.expected().known());
Size newRepSize = theRepSize.expected();
if (!theRepHdr.theAbortCoord)
return false;
const Size abSz =
theRepHdr.theAbortCoord.pos(theRepHdr.theHdrSize, theRepSize.expected()-theRepHdr.theHdrSize);
if (abSz < 0)
return false;
/* expecting abort, sooner or later */
newRepSize = abSz;
// we may have leftovers if we were parsing aborted content
if (abSz <= theRepSize.actual() + unconsumed())
return true; // reached abort level
return false;
}
Error CltXact::setStatusCode(int aStatus) {
theHttpStatus = aStatus;
if (theHttpStatus != RepHdr::sc200_OK &&
theHttpStatus != RepHdr::sc304_NotModified &&
theHttpStatus != RepHdr::sc403_Forbidden &&
theHttpStatus != RepHdr::sc407_ProxyAuthRequired &&
!theRepHdr.redirect())
return errHttpStatusCode;
return Error();
}
void CltXact::checkDateSync() {
if (theRepHdr.theDate < 0) {
ReportError(errHttpNoDate);
return;
}
const Time maxGap = Time::Sec(60);
// cannot measure drift using replies that took too long
if (TheClock - theStartTime >= maxGap/2)
return;
if (theRepHdr.theDate > TheClock.time() + maxGap ||
theRepHdr.theDate < TheClock.time() - maxGap) {
if (ReportError(errSyncDate)) {
HttpDatePrint(Comment << "request generated: ", theStartTime) << endc;
HttpDatePrint(Comment << "response generated: ", theRepHdr.theDate) << endc;
HttpDatePrint(Comment << "response received: ", TheClock.time()) << endc;
const Time diff = theRepHdr.theDate - TheClock.time();
Comment << "time difference: " << diff << endc;
}
return;
}
}
void CltXact::updatePubWorld(const ObjWorld &slice) {
PubWorld &pubWorld = *TheHostMap->at(theOid.viserv())->thePubWorld;
int sliceIdx = -1;
if (pubWorld.find(slice.id(), sliceIdx))
pubWorld.sliceAt(sliceIdx).update(slice);
else
pubWorld.add(slice);
}
// called by BodyParsers, must not finish()
void CltXact::noteContent(const ParseBuffer &content) {
if (theRepHdr.theChecksum.set())
theCheckAlg.update(content.data(), content.size());
}
// called by BodyParsers, must not finish()
Error CltXact::noteEmbedded(ReqHdr &hdr) {
TheEmbedStats.urlSeen++;
const Error err = hdr.theUri.oid.foreignUrl() ?
handleForeignEmbedOid(hdr) : handleEmbedOid(hdr);
if (!err) {
// "fix" new oid record
hdr.theUri.oid.repeat(theOid.repeat());
hdr.theUri.oid.hot(theOid.hot());
hdr.theUri.oid.ims200(theOid.ims200());
hdr.theUri.oid.ims304(theOid.ims304());
hdr.theUri.oid.reload(theOid.reload());
hdr.theUri.oid.get(true);
theOwner->noteEmbedded(this, hdr.theUri.oid);
}
return err;
}
// called by BodyParsers, must not finish()
void CltXact::noteTrailerHeader(const ParseBuffer &hdr) {
if (ReportError(errTrailerHeader))
printMsg(hdr.data(), hdr.size());
}
// called by BodyParsers, must not finish()
void CltXact::noteEndOfTrailer() {
Should(!theRepSize.expectingWhatParsed());
theRepSize.expectingWhatParsed(true);
}
Error CltXact::handleEmbedOid(ReqHdr &hdr) {
Error err;
if (validRelOid(hdr.theUri.oid)) {
if (hdr.theUri.host)
err = setViserv(hdr.theUri.host, hdr.theUri.oid);
else
hdr.theUri.oid.viserv(theOid.viserv()); // relative URL
if (!err)
theOwner->selectTarget(hdr.theUri.oid);
} else {
err = errBadEmbedUri;
}
return err;
}
Error CltXact::handleForeignEmbedOid(ReqHdr &hdr) {
Error err;
//if (strncmp(hdr.theUri.pathBuf, "/cgi-bin/cntmgr.pl", 18) == 0)
// TheEmbedStats.scriptMgrUrlSeen++;
if (!hdr.theUri.host) {
// must set host name for Client to know where to send requests
hdr.theUri.oid.viserv(theOid.viserv());
const NetAddr &visName = TheHostMap->at(theOid.viserv())->theAddr;
char buf[4*1024];
ofixedstream os(buf, sizeof(buf)-1);
os << "http://" << visName.addrA() << ':' << visName.port()
<< hdr.theUri.oid.foreignUrl() << ends;
buf[sizeof(buf)-1] = '\0';
hdr.theUri.oid.foreignUrl(buf);
}
//if (TheEmbedStats.scriptUrlSeen % 1000 == 0) {
// (clog << here << "tag url: ").write(hdr.theUri.pathBuf, hdr.theUri.pathLen);
// clog << endl;
//}
// hdr.theUri.oid is already set
return err;
}
Error CltXact::setViserv(const NetAddr &name, ObjId &oid) const {
// XXX: merge with SrvXact::setViserv() ?
int viserv = -1;
if (!TheHostMap->find(name, viserv))
return errForeignHostName;
if (viserv != theOid.viserv())
return errSrvRedirect; // disallow for now
oid.viserv(viserv);
return Error();
}
void CltXact::logStats(OLog &ol) const {
Xaction::logStats(ol);
ol << theOwner->seqvId();
ol << (theSrvRep ? theSrvRep->serverIdx() : -1);
}
syntax highlighted by Code2HTML, v. 0.9.1