/* Web Polygraph http://www.web-polygraph.org/ * (C) 2003-2006 The Measurement Factory * Licensed under the Apache License, Version 2.0 */ #include "base/polygraph.h" #include #include #include "xstd/h/sstream.h" #include "xstd/h/iomanip.h" #include "xstd/Clock.h" #include "xstd/gadgets.h" #include "base/polyLogCats.h" #include "base/RndPermut.h" #include "base/AddrParsers.h" #include "runtime/Connection.h" #include "runtime/AddrMap.h" #include "runtime/HostMap.h" #include "runtime/PubWorld.h" #include "runtime/HttpDate.h" #include "runtime/StatPhase.h" #include "runtime/StatPhaseMgr.h" #include "runtime/StatPhaseSync.h" #include "runtime/SharedOpts.h" #include "runtime/PageInfo.h" #include "runtime/ConnMgr.h" #include "runtime/HttpCookies.h" #include "runtime/ErrorMgr.h" #include "runtime/LogComment.h" #include "runtime/httpText.h" #include "runtime/polyBcastChannels.h" #include "runtime/polyErrors.h" #include "runtime/globals.h" #include "csm/ContentCfg.h" #include "csm/ContentMgr.h" #include "csm/oid2Url.h" #include "client/CltOpts.h" #include "client/Client.h" #include "client/ServerRep.h" #include "client/PrivCache.h" #include "client/CltDataFilterRegistry.h" #include "client/ChunkedCodingParser.h" #include "client/AnyBodyParser.h" #include "client/MarkupBodyParser.h" #include "client/UriScriptBodyParser.h" #include "client/ParseBuffer.h" #include "client/RegExGroups.h" #include "client/MembershipMap.h" #include "client/AclGroup.h" #include "client/CltCfg.h" #include "client/PipelinedCxm.h" #include "client/CltXact.h" static ObjFarm ThePipelineMgrs; CltXact::CltXact(): theOwner(0), thePage(0), theMgr(0), theSelfMgr(*this) { CltXact::reset(); } void CltXact::reset() { Xaction::reset(); theOwner = 0; if (thePage) PageInfo::Abandon(thePage); if (theMgr) { if (theMgr != &theSelfMgr && --theMgr->useLevel <= 0) ThePipelineMgrs.put((PipelinedCxm*)theMgr); theMgr = 0; } theSrvRep = 0; theContentCfg = 0; theBodyParser = 0; theNextHop = NetAddr(); theRepHdr.reset(); theOlcTimes.reset(); theReqBodySize = -1; thePeerState = peerUnknown; theCred.reset(); theSavedRepHeader.reset(); theCause = 0; theChildCount = 0; doRetry = false; theLogCat = lgcCltSide; } Agent *CltXact::owner() { return theOwner; } const UniqId &CltXact::reqId() const { return theId; } void CltXact::enqueue() { theEnqueTime = TheClock; } CltXactMgr *CltXact::getPipeline() { Assert(theMgr); if (!theConn->pipelineable()) return 0; // if we are not already pipelining, create a pipeline manager if (theMgr == &theSelfMgr) { ThePipelineMgrs.limit(1024); // magic, no good way to estimate Assert(theState == stBodyWaiting); theSelfMgr.releaseReadControl(this); PipelinedCxm *mgr = ThePipelineMgrs.get(); ++mgr->useLevel; mgr->assumeReadControl(this, theMgr); theMgr = mgr; } return theMgr; } void CltXact::pipeline(CltXactMgr *aMgr) { Assert(theMgr != aMgr); // or we will overincrement useLevel theMgr = aMgr; ++theMgr->useLevel; theConn = theMgr->conn(); Assert(theConn); theConn->startUse(); } void CltXact::page(PageInfo *aPage) { if (Should(!thePage && aPage)) thePage = PageInfo::Use(aPage); } PageInfo *CltXact::page() { return thePage; } void CltXact::exec(Client *anOwner, Connection *aConn) { Assert(theOid); Assert(anOwner); theOwner = anOwner; if (!theMgr) theMgr = &theSelfMgr; start(aConn); if (!theOid.foreignUrl()) { theSrvRep = TheHostMap->serverRepAt(theOid.viserv()); theContentCfg = TheContentMgr.get(theOid.type()); } theOwner->cfg()->selectAbortCoord(theAbortCoord); Should (!theConn->tunneling() || theConn->sslConfigured()); newState(theConn->tunneling() && !theConn->sslActive() ? stTunnelConnWaiting : stConnWaiting); // XXX: not last call theMgr->control(this); } void CltXact::finish(Error err) { if (!err && theRepHdr.redirect()) redirect(); // special actions for objects with bodies if (!theOid.head() && theBodyParser && theBodyParser->used()) { // check MD5 if (!err && !theOid.aborted() && theRepHdr.theChecksum.set()) { theCheckAlg.final(); if (!theCheckAlg.sum().equal(theRepHdr.theChecksum)) { if (ReportError(errChecksum)) { theRepHdr.theChecksum.print(Comment << "MD5 expected: ") << endc; theCheckAlg.sum().print(Comment << "MD5 received: ") << endc; if (TheOpts.theDumpFlags(dumpErr, dumpAny)) printMsg(theConn->theRdBuf); } err = errOther; } } // cache if (!err && theOid.cachable()) { if (PrivCache *cache = theOwner->privCache()) cache->storeOid(theOid); } } if (err && theOwner->privCache()) theOwner->privCache()->purgeOid(theOid); if (err) { if (err != errPipelineAbort) theMgr->noteAbort(this); } else { theMgr->noteDone(this); } if (thePage) { thePage->size += theRepSize.actual(); if (thePage->loneUser() && Should(thePage->start >= 0)) { thePage->lifeTime = TheClock - thePage->start; Broadcast(ThePageEndChannel, thePage); } PageInfo::Abandon(thePage); } Xaction::finish(err); if (theBodyParser) { theBodyParser->farm().put(theBodyParser); theBodyParser = 0; } theOwner->noteXactDone(this); } void CltXact::noteAbort() { newState(stDone); } void CltXact::controlledMasterRead() { if (!Should(theConn)) return; if (abortIo(&Connection::read)) return; if (!Should(theConn)) return; if (theConn->bad()) { if (!doRetry) { const bool hdrWaiting = theState == stHdrWaiting || theState == stTunnelRespWaiting; doRetry = hdrWaiting && !theConn->theRdBuf.contSize() && theConn->useCnt() > 1; } finish(errOther); return; } controlledPostRead(); } void CltXact::controlledPostRead() { if (theState == stHdrWaiting) { if (const Error err = getHeader()) { finish(err); return; } } if (theState == stBodyWaiting) { getBody(); return; } } Error CltXact::getHeader() { Assert(theState == stHdrWaiting || theState == stTunnelRespWaiting); if (theRepHdr.parse(theConn->theRdBuf.content(), theConn->theRdBuf.contSize())) { // change state immediately if we just established a tunnel if (theState == stTunnelRespWaiting) { if (theRepHdr.theStatus == RepHdr::sc200_OK) { static int connRespCount = 0; if (!connRespCount++ || TheOpts.theDumpFlags(dumpRep, dumpHdr)) printMsg(theConn->theRdBuf, theRepHdr.theHdrSize); theConn->theRdBuf.consumed(theRepHdr.theHdrSize); theRepHdr.reset(); newState(stConnWaiting); theMgr->rewind(this); return 0; } else { // assume the normal code will handle the error newState(stHdrWaiting); // XXX: not last call in this function // continue } } const Error err = interpretHeader(); // dump reply header after interpretHeader() to dump more oid flags static int respCount = 0; if (!respCount++ || TheOpts.theDumpFlags(dumpRep, dumpHdr)) printMsg(theConn->theRdBuf, theRepHdr.theHdrSize); if (err) return err; if (theHttpStatus == RepHdr::sc407_ProxyAuthRequired) saveRepHeader(); consume(theRepHdr.theHdrSize); Assert(theState == stBodyWaiting); return err; } else if (expectMore()) { if (theConn->theRdBuf.full()) return errHugeHdr; else return 0; } else { const bool readNothing = !theConn->theRdBuf.contSize(); // HTTP pconn race condition? const bool race = readNothing && theConn->useCnt() > 1; doRetry = doRetry || race; if (race) return errOther; else return readNothing ? errNoHdrClose : errPrematureEoh; } Should(false); // not reached return errOther; } void CltXact::getBody() { Assert(theState == stBodyWaiting); Assert(theBodyParser); parse(); if (expectMore()) { checkOverflow(); return; } const Size leftoverSize = unconsumed(); // no more data, set expected size if it was unknown if (!theRepSize.expected().known()) theRepSize.expect(theRepSize.actual() + leftoverSize); const bool pgAborted = cfgAbortedReply(); // make sure we do not leave anything behind and complain if needed if (leftoverSize > 0) { if (pgAborted) { // no need to complain, a Polygraph-initiated abort } else if (theRepSize.actual() + leftoverSize < theRepSize.expected()) { // no need for parser to complain since it is not a content error } else { const ParseBuffer leftovers(theConn->theRdBuf.content(), leftoverSize); theBodyParser->noteLeftovers(leftovers); } consume(leftoverSize); } authWithProxy(theHttpStatus == RepHdr::sc407_ProxyAuthRequired); if (theRepSize.expected() == theRepSize.actual()) { finish(0); return; } if (theRepSize.expected() < theRepSize.actual()) { finish(errExtraRepData); return; } Assert(theRepSize.expected() > theRepSize.actual()); if (pgAborted) { theConn->lastUse(true); theOid.aborted(true); finish(0); // not an error, configuration told server to abort return; } finish(errPrematureEof); } void CltXact::parse() { IOBuf &buf = theConn->theRdBuf; const Size availSize = unconsumed(); const ParseBuffer data = availSize > 0 ? ParseBuffer(buf.content(), availSize) : ParseBuffer(); if (const Size parsedSz = theBodyParser->noteData(data)) { consume(parsedSz); theConn->theRdBuf.pack(); } if (theRepSize.expectingWhatParsed()) { Should(!theRepSize.expected().known()); theRepSize.expect(theRepSize.actual()); } } void CltXact::checkOverflow() { const IOBuf &buf = theConn->theRdBuf; if (buf.contSize() >= buf.capacity()) { // full() requires pack()! const Size leftoverSize = unconsumed(); const ParseBuffer leftovers(buf.content(), leftoverSize); theBodyParser->noteOverflow(leftovers); // if needed, we can let the parser decide how much to chop off consume(leftoverSize); } } // fill buffer space under manager control bool CltXact::controlledFill(bool &needMore) { WrBuf &buf = theConn->theWrBuf; const Size reservedSpace = buf.capacity()/2; needMore = buf.spaceSize() < reservedSpace; if (needMore) return true; const char *reqStart = buf.space(); makeReqHeaders(); // append request body if needed if (theReqBodySize.known()) { const char *bodyStart = buf.space(); static RndGen rng(LclPermut(rndReqBody)); const Size offset = IOBuf::RandomOffset(rng.ltrial(), 0); theReqBodySize = buf.appendRndUpTo(offset, theReqBodySize); theReqSize += theReqBodySize; if (TheOpts.theDumpFlags(dumpReq, dumpBody)) printMsg(bodyStart, theReqBodySize); } if (buf.full()) { // the request does not fit in the reserved buffer if (ReportError(errReqDontFit)) { Comment << "reserved buffer capacity: " << reservedSpace << endc; printMsg(reqStart, theReqSize); } finish(errOther); return false; } return true; } // one transaction writes buffer under manager control bool CltXact::controlledMasterWrite(Size &size) { WrBuf &buf = theConn->theWrBuf; Size sz; if (buf.contSize() > 0 && abortIo(&Connection::write, &sz)) return false; if (theConn->bad()) { Assert(theReqSize >= 0); // HTTP pconn race condition? const bool race = buf.contSize() == theReqSize && theConn->useCnt() > 1; doRetry = doRetry || race; finish(errOther); return false; } size += sz; return true; } // possibly many transactions move on, after a single raw write bool CltXact::controlledPostWrite(Size &size, bool &needMore) { if (size >= theReqSize) { size -= theReqSize; needMore = false; } else { needMore = true; } if (!needMore) { if (theState == stSpaceWaiting && theOid.foreignUrl()) TheEmbedStats.foreignUrlRequested++; newState(theState == stTunnelSpaceWaiting ? stTunnelRespWaiting : stHdrWaiting); } return true; } void CltXact::controlledAbort() { doRetry = true; // retry if other tranactions caused this one to fail finish(errPipelineAbort); } void CltXact::makeReqHeaders() { WrBuf &buf = theConn->theWrBuf; ofixedstream os(buf.space(), buf.spaceSize()); if (theState == stTunnelConnWaiting) { makeConnectReq(os); newState(stTunnelSpaceWaiting); } else if (theState == stConnWaiting) { if (theConn->sslConfigured() && !theConn->sslActive()) theConn->sslActivate(); makeExplicitReq(os); newState(stSpaceWaiting); } } // make a CONNECT request void CltXact::makeConnectReq(ostream &os) { os << rlpConnect; makeReqHost(os); makeReqVersion(os); os << crlf; static int reqCount = 0; finishReqHdrs(os, !reqCount++); // no body for CONNECT requests } // make a non-CONNECT request void CltXact::makeExplicitReq(ostream &os) { // decide whether the request should have a body if (theOid.post() || theOid.put()) { static RndGen rng; // try to make sure we will have space after the headers; // may not work for small buffers (default 16KB is OK) theReqBodySize = rng(0, theConn->theWrBuf.spaceSize() / 2); } // make headers { makeReqMethod(os); makeEndToEndHdrs(os); makeHopByHopHdrs(os); static int reqCount = 0; finishReqHdrs(os, !reqCount++); } // where we should abort theAbortSize = theAbortCoord.pos(theReqSize, theReqBodySize); } void CltXact::finishReqHdrs(ostream &os, bool forceDump) { IOBuf &buf = theConn->theWrBuf; const char *reqStart = buf.space(); buf.appended((streamoff)os.tellp()); // give filters a chance, they may add their own headers TheCltDataFilterRegistry().apply(this, buf); buf.append("\r\n", Min(buf.spaceSize(), Size(2))); // end-of-header // may be adjusted in makeExplicitReq() if there is a body theReqSize = buf.space() - reqStart; // dump request header if (forceDump || TheOpts.theDumpFlags(dumpReq, dumpHdr)) printMsg(reqStart, theReqSize); } void CltXact::makeReqMethod(ostream &os) { if (theOid.get()) os << rlpGet; else if (theOid.post()) os << rlpPost; else if (theOid.head()) os << rlpHead; else if (theOid.put()) os << rlpPut; else Assert(false); } void CltXact::makeReqVersion(ostream &os) { if (theOwner->httpVersion() <= HttpVersion(1,0)) os << rlsHttp1p0; else os << rlsHttp1p1; } void CltXact::makeEndToEndHdrs(ostream &os) { if (theOwner->proxy()) { // proxied connection, use full URL Oid2Url(theOid, os); } else { // transparent folks get an advantage of shorter request-lines Oid2UrlPath(theOid, os); } makeReqVersion(os); /* request-header fields */ os << hfAccept; if (const String *codings = theOwner->cfg()->theAcceptedContentCodings) os << hfpAcceptEncoding << *codings << crlf; os << hfpHost; const HostCfg *host = makeReqHost(os); os << crlf; if (theOid.imsAny() && olcTimes().lmt() >= 0) { const Time t = theOid.ims200() ? (olcTimes().lmt() - Time::Sec(1)) : (olcTimes().lmt() + Time::Sec(1)); HttpDatePrint(os << hfpIMS, t) << crlf; } if (theOid.reload()) os << hfReload; /* entity-header fields */ os << hfpXXact << TheGroupId << ' ' << theId << crlf; // send public world info if (!theOid.foreignUrl()) { const PubWorld &pubWorld = *host->thePubWorld; os << hfpXLocWorld << pubWorld.localSlice() << crlf; if (const PubWorldSlice *slice = pubWorld.sliceToSync()) os << hfpXRemWorld << *slice << crlf; if (theSrvRep) os << hfpXTarget << theSrvRep->addr() << crlf; } os << hfpXAbort << theAbortCoord.whether() << ' ' << theAbortCoord.where() << crlf; // report our readiness to change phase os << hfpXPhaseSyncPos << TheStatPhaseMgr.phaseSyncPos() << crlf; if (theReqBodySize.known()) os << "Content-length: " << (int)theReqBodySize << crlf; if (host && host->theCookies) makeCookies(os, host->theCookies); } void CltXact::makeHopByHopHdrs(ostream &os) { /* general-header fields */ // persistency indication depends on HTTP version if (theOwner->httpVersion() <= HttpVersion(1,0)) { if (theConn->reusable()) os << (theOwner->proxy() ? hfConnAlivePxy : hfConnAliveOrg); } else { if (!theConn->reusable()) os << (theOwner->proxy() ? hfConnClosePxy : hfConnCloseOrg); } if (theOwner->credsForProxy(theOid, theCred)) PrintBase64(os << hfpProxyAuthorization, theCred.image().data(), theCred.image().len()) << crlf; } const HostCfg *CltXact::makeReqHost(ostream &os) { if (theOid.foreignUrl()) { const char *furi = theOid.foreignUrl().cstr(); NetAddr addr; if (SkipHostInUri(furi, furi+theOid.foreignUrl().len(), addr)) os << addr.addrA() << ':' << addr.port(); return 0; } else if (const HostCfg *host = TheHostMap->at(theOid.viserv())) { const NetAddr &visName = host->theAddr; os << visName.addrA() << ':' << visName.port(); return host; } Should(false); return 0; } void CltXact::makeCookies(ostream &os, HttpCookies *cookies) { // configure future response parser to collect new cookies int keepLimit = 0; if (!theOwner->doCookies(keepLimit)) return; cookies->keepLimit(keepLimit); theRepHdr.collectCookies(cookies); // send back all cookies we kept (if any) const int count = Min(keepLimit, cookies->count()); Size accSize = 0; for (int i = 0; i < count; ++i) { const String &cookie = cookies->get(i); const Size cookieSize = cookie.len(); os << hfpCookie; WrBuf &buf = theConn->theWrBuf; const Size usedSize = (streamoff)os.tellp(); const Size spaceRemaining = buf.spaceSize() - usedSize; const bool fit = cookieSize <= spaceRemaining; if (!fit && ReportError(errCookiesDontFit)) { Comment << "cookie size: " << cookieSize << " cookies buffered: " << i << '/' << count << " or " << accSize << "; space left: " << spaceRemaining << endc; } os << cookie; // note: kept cookies contain terminating CRLF; accSize += cookieSize; if (!fit) break; } } void CltXact::consume(Size size) { theConn->theRdBuf.consumed(size); theRepSize.got(size); } Error CltXact::interpretHeader() { // does reply line make sense? if (!theRepHdr.theHttpVersion.known() || theRepHdr.theStatus < 0) return errHttpRLine; if (theSrvRep) theSrvRep->noteRequest(theOid); if (const Error err = setStatusCode(theRepHdr.theStatus)) return err; // check content-length, set RepSize if possible if (theRepHdr.expectBody()) { if (theOid.head()) theRepSize.expect(theRepHdr.theHdrSize); else if (theRepHdr.theTransferEncoding == MsgHdr::tcChunked) { if (theRepHdr.theContSize >= 0) // MUST ignore ReportError(errChunkedButCLen); } else if (theRepHdr.theContSize >= 0) theRepSize.expect(theRepHdr.theHdrSize + theRepHdr.theContSize); else if (theRepHdr.persistentConnection()) return errPersistButNoCLen; } else { if (theRepHdr.theContSize >= 0) return errUnexpectedCLen; theRepSize.expect(theRepHdr.theHdrSize); } // check that we can handle transfer encoding if (theRepHdr.theTransferEncoding == MsgHdr::tcOther) return errUnknownTransferEncoding; theOid.repToRedir(theRepHdr.redirect()); if (!theRepHdr.polyHeaders() && theRepHdr.expectPolyHeaders()) { theOid.foreignSrc(true); noteError(errForeignSrc); } if (theOid.foreignUrl()) TheEmbedStats.foreignUrlReceived++; checkAcl(); // firstHand here means our request reached the server and // server's reply reached us const bool firstHand = reqId().myMutant(theRepHdr.theXactId); theOid.cachable(theRepHdr.isCachable); theOid.hit(!firstHand && theRepHdr.theXactId && theOid.basic()); if (theOid.hit()) { if (!theOid.offeredHit() && !TheCltOpts.ignoreFalseHits) noteError(errFalseHit); if (!theOid.cachable()) noteError(errUnchbHit); } if (firstHand) { // native miss checkDateSync(); firstHandSync(); } else if (theRepHdr.theXactId) { // native second-hand response if (theOid.reload()) noteError(errReloadHit); } else if (theRepHdr.theDate >= 0 && theRepHdr.theDate < HttpDateAtMost(theStartTime)) { // our response is aged: it was generated before we asked for it if (theOid.reload()) noteError(errReloadHit); } if (theOid.offeredHit() && !theOid.hit()) noteError(errFalseMiss); checkFreshness(); // note if the server will close the connection if (!theRepHdr.persistentConnection()) { theConn->lastUse(true); theMgr->noteLastXaction(this); } theBodyParser = selectBodyParser(); newState(stBodyWaiting); return 0; } void CltXact::noteError(const Error &err) { if (err == errFalseMiss) { if (TheCltOpts.printFalseMisses) { Oid2Url(theOid, cout << "False-Miss: "); cout << endl; printMsg(theConn->theRdBuf, theRepHdr.theHdrSize); } return; // do not report false misses; they may not be errors } if (err == errForeignSrc) { if (theOid.foreignUrl()) return; // response to a foreign URL may be foreign static bool informed = false; if (!informed) { Comment(5) << "fyi: received first foreign response to " << "Polygraph-specific URL:" << endc; printMsg(theConn->theRdBuf); informed = true; } if (theHttpStatus == RepHdr::sc200_OK && theOwner->cfg()->acl()) return; // let checkAcl() handle this case if (TheOpts.acceptForeignMsgs) return; // the user told us to accept all foreign responses } if (!ReportError(err)) return; /* supply additional information */ if (err == errStaleHit || err == errReloadHit) { HttpDatePrint(cout << "\trep: ", theRepHdr.theDate) << endl; HttpDatePrint(cout << "\tlmt: ", olcTimes().lmt()) << endl; HttpDatePrint(cout << "\treq: ", theStartTime) << endl; HttpDatePrint(cout << "\tnow: " ) << endl; HttpDatePrint(cout << "\texp: ", olcTimes().exp()) << endl; } if (TheOpts.theDumpFlags(dumpErr, dumpAny)) printMsg(theConn->theRdBuf); } void CltXact::noteCredentialsGone() { theCred.reset(); } void CltXact::saveRepHeader() { theConn->theRdBuf.copyContent(theSavedRepHeader, theRepHdr.theHdrSize); } void CltXact::firstHandSync() { // update public world info if (theRepHdr.theRemWorld) updatePubWorld(theRepHdr.theRemWorld); // sync oid accounting on first-hand replies if (theSrvRep) theSrvRep->syncSrvInfo(theRepHdr); // sync phases if (theRepHdr.thePhaseSyncPos >= 0 && theRepHdr.theGroupId) TheStatPhaseSync.notePhaseSync(theRepHdr.theGroupId, theRepHdr.thePhaseSyncPos); } void CltXact::authWithProxy(bool needed) { bool reportErrDetails = false; if (needed) { // denied if (!theCred.image()) { // sent nothing doRetry = true; theOwner->noteProxyAuthReq(this); } else if (theCred.valid()) { // sent valid reportErrDetails = ReportError(errProxyAuthDenied); } } else { // allowed if (theCred.image() && !theCred.valid()) // sent invalid reportErrDetails = ReportError(errProxyAuthAllowed); } if (reportErrDetails) { Comment << "robot: " << theOwner->host() << " credentials: " << theCred.image() << " proxy: " << theOwner->proxy() << endc; } } void CltXact::redirect() { ObjId dest = theRepHdr.theLocn.oid; dest.rediredReq(true); dest.get(true); if (!dest.foreignUrl()) { if (!validRelOid(dest)) { ReportError(errRedirLocation); return; } if (theRepHdr.theLocn.host) { if (const Error err = setViserv(theRepHdr.theLocn.host, dest)) { noteError(err); return; } } else { dest.viserv(theOid.viserv()); } theOwner->selectTarget(dest); dest.repeat(true); // polysrv only redirects to seen URLs } else { dest.repeat(false); // XXX: may be a repeat, we do not know } // XXX: hot() is unset theOwner->noteRedirect(this, dest); } BodyParser *CltXact::selectBodyParser() { BodyParser *cparser = selectContentParser(); if (theRepHdr.chunkedEncoding()) return ChunkedCodingParser::GetOne(this, cparser); return cparser; } BodyParser *CltXact::selectContentParser() { // parse if content is markup if (theRepHdr.markupContent()) return selectMarkupBodyParser(); // if content is unknown, use URL extension (if any) to guess type if (!theRepHdr.knownContentType() && OidImpliesMarkup(theOid, theContentCfg)) return selectMarkupBodyParser(); // parse if domestic content may have embedded tags if (!theOid.foreignUrl() && theContentCfg->hasEmbedCont()) return selectMarkupBodyParser(); // do not parse otherwise return AnyBodyParser::GetOne(this); } BodyParser *CltXact::selectMarkupBodyParser() { if (!thePage) { thePage = PageInfo::Create(); thePage->start = theStartTime; } if (theOwner->cfg()->followAllUris(theRepHdr)) return UriScriptBodyParser::GetOne(this, theOwner->cfg()); return MarkupBodyParser::GetOne(this, theOwner->cfg()); } bool CltXact::expectMore() const { if (theConn->atEof()) return false; if (!theRepSize.expected().known()) return true; return theRepSize.expected() > theRepSize.actual() + unconsumed(); } Size CltXact::unconsumed() const { return theRepSize.expectToGet(theConn->theRdBuf.contSize()); } const ObjTimes &CltXact::olcTimes() const { // not calculated or not configured, but can be if (theOlcTimes.lmt() < 0 && theContentCfg) theContentCfg->calcTimes(theOid, theOlcTimes); return theOlcTimes; } bool CltXact::validRelOid(const ObjId &oid) const { return oid.world() && oid.name() > 0 && TheContentMgr.validId(oid.type()); } void CltXact::checkAcl() { const AclGroup &acl = theOwner->cfg()->acl(); if (!acl) // no access controls configured return; if (!acl.needsCheck(theOid.foreignUrl())) return; if (RepHdr::PositiveStatusCode(theHttpStatus)) { if (!theOid.foreignUrl() && theOid.foreignSrc()) checkAclMatch(acl.rewrite(), "rewritten"); else checkAclMatch(acl.allow(), "allowed"); return; } if (theHttpStatus == RepHdr::sc407_ProxyAuthRequired) { // do not check acls if we did not authenticate yet but can if (!theCred.image() && theOwner->hasCredentials()) return; // do not check acls if we sent invalid credentials if (theCred.image() && !theCred.valid()) return; } checkAclMatch(acl.deny(), "denied"); } void CltXact::checkAclMatch(const RegExGroup *matchedGrp, const char *action) { RegExMatchee m; buildAclMatchee(m); static Array matches; matches.reset(); theOwner->cfg()->acl().match(m, matches); bool explainMatch = false; if (matches.count() == 0) explainMatch = ReportError(errAclNoMatches); else if (matches.count() == 1 && matches.last() != matchedGrp) explainMatch = ReportError(errAclWrongMatch); else if (matches.count() > 1) explainMatch = ReportError(errAclManyMatches); if (explainMatch) explainAclMatch(m, action, matches); } void CltXact::buildAclMatchee(RegExMatchee &m) const { // form complete URL static WrBuf buf; buf.reset(); ofixedstream os(buf.space(), buf.spaceSize()-Size(1)); m.urlHost = buf.space() + 0; // URL host alone Oid2UrlHost(theOid, os); Should(os << ends); m.urlPath = buf.space() + os.tellp(); // URL path alone Oid2UrlPath(theOid, os); Should(os << ends); m.url = buf.space() + os.tellp(); // complete URL Oid2Url(theOid, os); Should(os << ends); buf.appended(Size(os.tellp())); buf.append("", 1); // terminate even if stream is full m.userName = theCred.image().cstr(); m.memberships = &theOwner->memberships(); } void CltXact::explainAclMatch(const RegExMatchee &m, const char *action, const Array &ourMatches) const { ostream &os = Comment(6) << "URL: " << m.url << endl; if (theCred.valid()) os << "\tuser: " << theCred.image() << endl; os << "\tmembership maps: " << m.memberships->count() << endl; os << "\tgroups: "; for (int g = 0; g < m.memberships->count(); ++g) { if (g) os << "; "; dumpMatchingGroupNames(os, m.memberships->item(g)); if (g >= 10) { os << "; ..."; break; } } os << endl; os << "\twas probably " << action << " but matches " << ourMatches.count() << " rule(s)"; for (int i = 0; i < ourMatches.count(); ++i) { os << (i == 0 ? ": " : ", "); os << theOwner->cfg()->acl().ruleName(ourMatches[i]); } os << endc; } void CltXact::dumpMatchingGroupNames(ostream &os, const MembershipMap *map) const { MembershipMap::GroupIterator i = map->groupIterator(theCred); for (int count = 0; i && count <= 5; ++count, ++i) { if (count) os << ", "; os << *i; } } void CltXact::checkFreshness() { // cannot check without the date header if (theRepHdr.theDate < 0) return; // skip in-transit modicications to avoid false errors due to racing if (HttpDateAtMost(theStartTime) <= olcTimes().lmt()) return; // response is stale if it was generated before modification time if (theRepHdr.theDate < olcTimes().lmt()) noteError(errStaleHit); } bool CltXact::cfgAbortedReply() const { Assert(theRepSize.expected().known()); Size newRepSize = theRepSize.expected(); if (!theRepHdr.theAbortCoord) return false; const Size abSz = theRepHdr.theAbortCoord.pos(theRepHdr.theHdrSize, theRepSize.expected()-theRepHdr.theHdrSize); if (abSz < 0) return false; /* expecting abort, sooner or later */ newRepSize = abSz; // we may have leftovers if we were parsing aborted content if (abSz <= theRepSize.actual() + unconsumed()) return true; // reached abort level return false; } Error CltXact::setStatusCode(int aStatus) { theHttpStatus = aStatus; if (theHttpStatus != RepHdr::sc200_OK && theHttpStatus != RepHdr::sc304_NotModified && theHttpStatus != RepHdr::sc403_Forbidden && theHttpStatus != RepHdr::sc407_ProxyAuthRequired && !theRepHdr.redirect()) return errHttpStatusCode; return Error(); } void CltXact::checkDateSync() { if (theRepHdr.theDate < 0) { ReportError(errHttpNoDate); return; } const Time maxGap = Time::Sec(60); // cannot measure drift using replies that took too long if (TheClock - theStartTime >= maxGap/2) return; if (theRepHdr.theDate > TheClock.time() + maxGap || theRepHdr.theDate < TheClock.time() - maxGap) { if (ReportError(errSyncDate)) { HttpDatePrint(Comment << "request generated: ", theStartTime) << endc; HttpDatePrint(Comment << "response generated: ", theRepHdr.theDate) << endc; HttpDatePrint(Comment << "response received: ", TheClock.time()) << endc; const Time diff = theRepHdr.theDate - TheClock.time(); Comment << "time difference: " << diff << endc; } return; } } void CltXact::updatePubWorld(const ObjWorld &slice) { PubWorld &pubWorld = *TheHostMap->at(theOid.viserv())->thePubWorld; int sliceIdx = -1; if (pubWorld.find(slice.id(), sliceIdx)) pubWorld.sliceAt(sliceIdx).update(slice); else pubWorld.add(slice); } // called by BodyParsers, must not finish() void CltXact::noteContent(const ParseBuffer &content) { if (theRepHdr.theChecksum.set()) theCheckAlg.update(content.data(), content.size()); } // called by BodyParsers, must not finish() Error CltXact::noteEmbedded(ReqHdr &hdr) { TheEmbedStats.urlSeen++; const Error err = hdr.theUri.oid.foreignUrl() ? handleForeignEmbedOid(hdr) : handleEmbedOid(hdr); if (!err) { // "fix" new oid record hdr.theUri.oid.repeat(theOid.repeat()); hdr.theUri.oid.hot(theOid.hot()); hdr.theUri.oid.ims200(theOid.ims200()); hdr.theUri.oid.ims304(theOid.ims304()); hdr.theUri.oid.reload(theOid.reload()); hdr.theUri.oid.get(true); theOwner->noteEmbedded(this, hdr.theUri.oid); } return err; } // called by BodyParsers, must not finish() void CltXact::noteTrailerHeader(const ParseBuffer &hdr) { if (ReportError(errTrailerHeader)) printMsg(hdr.data(), hdr.size()); } // called by BodyParsers, must not finish() void CltXact::noteEndOfTrailer() { Should(!theRepSize.expectingWhatParsed()); theRepSize.expectingWhatParsed(true); } Error CltXact::handleEmbedOid(ReqHdr &hdr) { Error err; if (validRelOid(hdr.theUri.oid)) { if (hdr.theUri.host) err = setViserv(hdr.theUri.host, hdr.theUri.oid); else hdr.theUri.oid.viserv(theOid.viserv()); // relative URL if (!err) theOwner->selectTarget(hdr.theUri.oid); } else { err = errBadEmbedUri; } return err; } Error CltXact::handleForeignEmbedOid(ReqHdr &hdr) { Error err; //if (strncmp(hdr.theUri.pathBuf, "/cgi-bin/cntmgr.pl", 18) == 0) // TheEmbedStats.scriptMgrUrlSeen++; if (!hdr.theUri.host) { // must set host name for Client to know where to send requests hdr.theUri.oid.viserv(theOid.viserv()); const NetAddr &visName = TheHostMap->at(theOid.viserv())->theAddr; char buf[4*1024]; ofixedstream os(buf, sizeof(buf)-1); os << "http://" << visName.addrA() << ':' << visName.port() << hdr.theUri.oid.foreignUrl() << ends; buf[sizeof(buf)-1] = '\0'; hdr.theUri.oid.foreignUrl(buf); } //if (TheEmbedStats.scriptUrlSeen % 1000 == 0) { // (clog << here << "tag url: ").write(hdr.theUri.pathBuf, hdr.theUri.pathLen); // clog << endl; //} // hdr.theUri.oid is already set return err; } Error CltXact::setViserv(const NetAddr &name, ObjId &oid) const { // XXX: merge with SrvXact::setViserv() ? int viserv = -1; if (!TheHostMap->find(name, viserv)) return errForeignHostName; if (viserv != theOid.viserv()) return errSrvRedirect; // disallow for now oid.viserv(viserv); return Error(); } void CltXact::logStats(OLog &ol) const { Xaction::logStats(ol); ol << theOwner->seqvId(); ol << (theSrvRep ? theSrvRep->serverIdx() : -1); }