/* Web Polygraph http://www.web-polygraph.org/
* (C) 2003-2006 The Measurement Factory
* Licensed under the Apache License, Version 2.0 */
#include "base/polygraph.h"
#include "xstd/gadgets.h"
#include "base/StatIntvlRec.h"
#include "runtime/LogComment.h"
#include "runtime/ErrorMgr.h"
#include "runtime/polyErrors.h"
#include "csm/XmlParser.h"
#include "csm/XmlTagParser.h"
#include "csm/XmlTagIdentifier.h"
#include "client/CltXact.h"
#include "client/CltOpts.h"
#include "client/CltCfg.h"
#include "client/ParseBuffer.h"
#include "client/MarkupBodyParser.h"
BodyParserFarmT<MarkupBodyParser> MarkupBodyParser::TheParsers;
BodyParser *MarkupBodyParser::GetOne(CltXact *anOwner, const CltCfg *aCfg) {
if (!TheParsers.capacity())
TheParsers.limit(1024);
MarkupBodyParser *parser = TheParsers.getTyped();
parser->configure(anOwner, aCfg);
return parser;
}
MarkupBodyParser::MarkupBodyParser(): theCfg(0) {
}
void MarkupBodyParser::reset() {
resetSelf();
BodyParser::reset();
}
void MarkupBodyParser::resetSelf() {
theCfg = 0;
}
void MarkupBodyParser::configure(CltXact *anOwner, const CltCfg *aCfg) {
BodyParser::configure(anOwner);
Check(!theCfg && aCfg);
theCfg = aCfg;
}
BodyParserFarm &MarkupBodyParser::farm() const {
return TheParsers;
}
Size MarkupBodyParser::parse(const ParseBuffer &buf) {
// do not search for tags if no tags need to be followed
if (!theCfg->theContainerTags->tagCount()) {
theOwner->noteContent(buf);
return buf.size();
}
static XmlParser parser;
(void)parser.parse(buf.data(), buf.data() + buf.size());
for (int i = 0; i < parser.nodeCount(); ++i) {
const XmlParser::Node &node = parser.node(i);
if (node.tag())
parseTag(node.imageBeg, node.imageBeg + node.imageLen);
}
const Size parsedSize = parser.tail().imageBeg - buf.data();
theOwner->noteContent(buf.head(parsedSize));
return parsedSize;
}
void MarkupBodyParser::noteLeftovers(const ParseBuffer &leftovers) {
if (!TheCltOpts.ignoreBadContTags && ReportError(errContentLeftovers)) {
dumpContext(Comment << "open markup tag near ",
leftovers.data(), leftovers.size()) << endc;
}
theOwner->noteContent(leftovers);
}
void MarkupBodyParser::noteOverflow(const ParseBuffer &buf) {
if (!TheCltOpts.ignoreBadContTags && ReportError(errHugeContentToken)) {
dumpContext(Comment << "huge markup tag near ",
buf.data(), buf.size()) << endc;
}
theOwner->noteContent(buf);
}
void MarkupBodyParser::parseTag(const char *tagB, const char *tagE) {
TheEmbedStats.tagSeen++;
// TheEmbedStats.scriptSeen++;
// static to avoid allocating array-of-tokens all the time
static XmlTagParser tagParser;
Error err;
int tagIdx = -1;
const XmlTagIdentifier &map = *theCfg->theContainerTags;
if (tagParser.parseTagname(tagB+1, tagE-1)) {
const XmlTagParser::Token &tagname = tagParser.tagname();
if (!map.findTag(tagname.nameBeg, tagname.nameLen, tagIdx))
return; // not an error
tagParser.parseRest();
TheEmbedStats.tagMatched++;
} else {
err = tagParser.error();
Assert(err);
}
// iterate over attribute tokens within a tag
for (int i = 0; !err && i < tagParser.attrCount(); ++i) {
TheEmbedStats.attrSeen++;
const XmlTagParser::Token &attr = tagParser.attr(i);
if (!map.findAttr(tagIdx, attr.nameBeg, attr.nameLen))
continue;
const char *url = attr.valBeg;
// ignore empty or local references
if (attr.valLen <= 0 || *url == '#')
continue;
TheEmbedStats.attrMatched++;
ReqHdr hdr;
if (hdr.parseUri(url, url + attr.valLen, hdr.theUri))
err = theOwner->noteEmbedded(hdr);
else
err = errForeignTag;
}
if (err && !TheCltOpts.ignoreBadContTags && ReportError(err))
dumpContext(Comment << "offending markup tag: ", tagB, tagE - tagB) << endc;
}
syntax highlighted by Code2HTML, v. 0.9.1