/* Web Polygraph http://www.web-polygraph.org/ * (C) 2003-2006 The Measurement Factory * Licensed under the Apache License, Version 2.0 */ #include "base/polygraph.h" #include #include "xstd/Rnd.h" #include "xstd/gadgets.h" #include "runtime/LogComment.h" #include "runtime/ErrorMgr.h" #include "runtime/polyErrors.h" #include "client/CltXact.h" #include "client/CltOpts.h" #include "client/ParseBuffer.h" #include "client/UriScriptBodyParser.h" // commonly used RE patterns static const String ptnConst = "'([^']*)'"; static const String ptnName = "([[:alnum:]_]+)"; static const String ptnSpace = "[[:space:]]*"; static const String ptnEq = ptnSpace + "=" + ptnSpace; RegEx UriScriptBodyParser::TheWre; RegEx UriScriptBodyParser::TheXre; RegEx UriScriptBodyParser::TheYre; RegEx UriScriptBodyParser::TheZre; BodyParserFarmT UriScriptBodyParser::TheParsers; BodyParser *UriScriptBodyParser::GetOne(CltXact *anOwner, const CltCfg *aCfg) { if (!TheParsers.capacity()) TheParsers.limit(1024); UriScriptBodyParser *parser = TheParsers.getTyped(); parser->configure(anOwner, aCfg); return parser; } UriScriptBodyParser::UriScriptBodyParser() { // configure static REs if needed if (!TheXre.configured()) { ConfigureScriptVarRe(TheWre, "w"); ConfigureScriptVarRe(TheXre, "x"); ConfigureScriptVarRe(TheYre, "y"); ConfigureScriptVarRe(TheZre, "z"); } resetSelf(); } void UriScriptBodyParser::reset() { resetSelf(); BodyParser::reset(); } void UriScriptBodyParser::resetSelf() { theCfg = 0; theScriptBeg = theScriptEnd = 0; theState = stNone; } void UriScriptBodyParser::configure(CltXact *anOwner, const CltCfg *aCfg) { BodyParser::configure(anOwner); Check(!theCfg && aCfg); theCfg = aCfg; } BodyParserFarm &UriScriptBodyParser::farm() const { return TheParsers; } Size UriScriptBodyParser::parse(const ParseBuffer &buf) { if (theState == stNone) theState = stOpen; if (theState == stOpen) parseOpen(buf); if (theState == stClose) parseClose(buf); if (theState == stBody) parseBody(buf); if (theState == stDone) return parseAny(buf); return 0; // need more data } void UriScriptBodyParser::parseOpen(const ParseBuffer &buf) { static const String pfx = "= 0 && m.rm_eo > m.rm_so) { value = String(theScriptBeg + m.rm_so, m.rm_eo - m.rm_so); return true; } } if (name) { // client asked to report errors const Error &err = errForeignTag; if (!TheCltOpts.ignoreBadContTags && ReportError(err)) { dumpContext(Comment << "undefined or malformed variable '" << name << "' in the following emdedded script", theScriptBeg, theScriptEnd - theScriptBeg) << endc; } } return false; } void UriScriptBodyParser::forwardUri(const String &uri) { Error err; const char *parsep = uri.cstr(); ReqHdr hdr; if (hdr.parseUri(parsep, parsep+uri.len(), hdr.theUri)) err = theOwner->noteEmbedded(hdr); else err = errForeignTag; if (err) { if (!TheCltOpts.ignoreBadContTags && ReportError(err)) { dumpContext(Comment << "unparseable recreated URL: ", uri.data(), uri.len()) << endc; } } else { static int reportCount = 0; if (!reportCount++) { dumpContext(Comment << "fyi: first URL extracted " << "from a script: ", uri.data(), uri.len()) << endc; } } } Size UriScriptBodyParser::parseAny(const ParseBuffer &buf) { const Size parsedSize = buf.size(); theOwner->noteContent(buf); return parsedSize; } void UriScriptBodyParser::noteLeftovers(const ParseBuffer &leftovers) { Should(theState == stOpen || theState == stClose); if (theState == stClose && ReportError(errContentLeftovers)) { const char *what = theState == stOpen ? "beginning" : "end"; dumpContext(Comment << "unable to locate the " << what << " of a URI-setting embedded