/* Web Polygraph http://www.web-polygraph.org/
* (C) 2003-2006 The Measurement Factory
* Licensed under the Apache License, Version 2.0 */
#include "base/polygraph.h"
#include "xstd/h/iostream.h"
#include <fstream>
#include <ctype.h>
#include "xstd/NetAddr.h"
#include "base/AddrParsers.h"
#include "base/ForeignTrace.h"
ForeignTrace::ForeignTrace(): doIgnoreBad(true) {
}
void ForeignTrace::configure(const String &aName, bool ignoreBad) {
theName = aName;
doIgnoreBad = ignoreBad;
}
int ForeignTrace::gatherUrls(Array<String*> &urls) const {
ifstream is(theName.cstr());
open(is);
while (String *url = getUrl(is)) {
urls.append(url);
theMemSize += url->len(); // content
theMemSize += SizeOf(String); // overhead
theMemSize += SizeOf(String*); // overhead
}
close(is, urls.count());
return urls.count();
}
int ForeignTrace::gatherHosts(Array<NetAddr*> &hosts) const {
ifstream is(theName.cstr());
open(is);
while (String *url = getUrl(is)) {
// extract host from the URL
NetAddr host;
const char *urlB = url->cstr();
const char *urlE = urlB + url->len();
if (const char *hostEnd = SkipHostInUri(urlB, urlE, host)) {
hosts.append(new NetAddr(host));
theMemSize += hostEnd - urlB; // content
theMemSize += SizeOf(NetAddr); // overhead
theMemSize += SizeOf(NetAddr*); // overhead
}
delete url;
}
close(is, hosts.count());
return hosts.count();
}
String *ForeignTrace::getUrl(istream &is) const {
char line[16*1024];
while (is.good() && is.getline(line, sizeof(line))) {
// delete comments
if (char *comment = strchr(line, '#'))
*comment = '\0';
// find first URL on the line
if (const char *urlB = strstr(line, "http://")) {
// find URL end (white space or eol)
const char *urlE = urlB + 7;
while (*urlE && !isspace(*urlE))
++urlE;
String *url = new String;
url->append(urlB, urlE - urlB);
++theEntryCount;
return url;
}
// skip leading spaces to avoid warning about empty lines
const char *urlB = line;
while (*urlB && isspace(*urlB))
++urlB;
if (*urlB && !doIgnoreBad) {
cerr << here <<
"error: all trace URLs must follow " <<
"http://host/path format; " << endl <<
"\tfound: " << urlB << endl;
}
}
return 0;
}
void ForeignTrace::open(istream &is) const {
if (is.bad()) {
cerr << "failed to open '" << theName << "' trace for reading: " <<
Error::Last() << endl;
}
theMemSize = 0;
theEntryCount = 0;
}
void ForeignTrace::close(istream &is, int goodCount) const {
if (goodCount > 0)
clog << "fyi: loaded trace from ";
else
clog << "warning: empty trace in ";
clog << "'" << theName << "': " <<
"used " << goodCount << " entries out of " << theEntryCount << ", " <<
"spent at least " << theMemSize << " bytes" <<
endl;
}
syntax highlighted by Code2HTML, v. 0.9.1