/*
http_active
Copyright July 5, 2001, The University of North Carolina at Chapel Hill
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Contact person:
Frank D. Smith, University of North Carolina at Chapel Hill
email: smithfd@cs.unc.edu
phone: 919-962-1884
fax: 919-962-1799
*/
/* Program to create an activity trace (summary form) of web browsing clients
with respect to three types of activity: client sending request data,
server sending response data, client is idle (no request or response).
Identification of idle periods is used to infer user "think" times
between requests for new top-level pages. A client is defined by
a single IP address.
"Idle" is defined as a period of time greater than a threshold value
("idle_limit" with a default of 2 seconds) during which a client has no
requests outstanding. A request is outstanding from the
start time of a request until the end time (normal or terminated) of
the corresponding response.
The input to this program is the SORTed output from http_connect.
The sort to be applied is produced with the following shell script:
sort -s -o $1.sort +1 -2 +0 -1 -T /tmp $1
This sorts all the records for a given client IP address in timestamp
order.
The output is also time ordered with respect to a single client (IP
address) and consists only of client request entries (in the same format
as the input) and ordered by start time, server responses (in the same
format as the input) and ordered by end time, and client idle periods
giving the elapsed idle time and ordered by the end of the idle period.
The output file has extension ".activity" added by the program.
To get usage information, invoke the program with the -h switch.
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <sys/time.h>
#define min(a,b) ((a) <= (b) ? (a) : (b))
#define max(a,b) ((a) >= (b) ? (a) : (b))
void Usage(char *s)
{
fprintf (stderr,"\nUsage: %s\n", s);
fprintf (stderr," [-w file_name] (name for output file)\n");
fprintf (stderr," [-r file_name] (name for input file)\n");
fprintf (stderr," [-I idle_limit] (min inactivity interval)\n");
fprintf (stderr,"\n");
exit(-1);
}
FILE *dumpFP, *outFP;
struct timeval time_stamp = {0,0};
int req;
int rsp;
char ts[20];
char sh[25];
char sp[10];
char gt[3];
char dh[25];
char dp[10];
char fl[5];
char current_src[25] = "";
enum client_states
{PENDING_ACTIVE, ACTIVE, PENDING_IDLE, IDLE};
enum client_states client_state = PENDING_ACTIVE;
enum event_types {SYN, ACT_REQ, ACT_RSP, END, REQ, RSP};
enum event_types event_type;
char idle_begin[20];
char earliest_end[20];
char last_client_ts[20];
/* For each client (IP address) maintain a table of HTTP connections
that are "active" with the following information about each connection:
id (host/port 4-tuple identifying the connection
activity (1 if a request has been sent and the response is not
yet complete; 0 otherwise)
state (like activity)
A connection is "active" (in the table) from the time a connection
start (SYN, ACT-REQ or ACT-RSP) is seen in the input until a
connection end (FIN, RST, TRM) is also seen in the input
*/
int active_connections = 0;
#define MAX_CONNECTIONS 1000
struct connect
{
char id[50];
int activity;
enum event_types state;
}connections[MAX_CONNECTIONS];
char new_line[500];
void error_line(char *s);
void error_state(char *s);
void log_REQ(void);
void log_RSP(void);
void log_IDLE(char *s);
void set_connection(char *sp, char *dh, char *dp, enum event_types type);
void ClearConnections(void);
int ConnectionsActive(void);
int FindConnection(char *sp, char *dh, char *dp);
int AddConnection(char *sp, char *dh, char *dp);
int RemoveConnection(char *sp, char *dh, char *dp);
long elapsed_ms(char *end, char *start);
void main (int argc, char* argv[])
{
int i;
char input_name[256] = "";
char output_name[256] = "";
long idle_limit = 2000; /* default threshold for idleness in millisec. */
long elapsed;
char parse_line[500];
char discard[50];
char *cursor;
char *vp;
/* Parse the command line */
i = 1;
while (i < argc) {
if (strcmp (argv[i], "-r") == 0) {
if (++i >= argc) Usage (argv[0]);
strcpy (input_name, argv[i]);
}
else if (strcmp (argv[i], "-w") == 0) {
if (++i >= argc) Usage (argv[0]);
strcpy (output_name, argv[i]);
}
else if (strcmp (argv[i], "-I") == 0) {
if (++i >= argc) Usage (argv[0]);
idle_limit = (long)atoi(argv[i]);
}
else
Usage (argv[0]);
i++;
}
/* Open files */
if (strcmp(output_name, "") == 0)
outFP = stdout;
else
{
strcat(output_name, ".activity");
if ((outFP = fopen (output_name, "w")) == NULL) {
fprintf (stderr, "error opening %s\n", output_name);
exit (-1);
}
}
if (strcmp(input_name, "") == 0)
dumpFP = stdin;
else
{
if ((dumpFP = fopen (input_name, "r")) == NULL) {
fprintf (stderr, "error opening %s\n", input_name);
exit (-1);
}
}
/* Read each record in the input file. Look for a change in the
source IP address (which indicates a new client). If a new
client, log the end of an idle period (if any) for the old
client and initialize the connection table for the new client.
If a record for the current client has been read, classify the
type of event it represent and process it to update the client
and connection state.
*/
while (!feof (dumpFP)) {
/* Get and parse line of data */
if (fgets (new_line, sizeof(new_line), dumpFP) == NULL)
break;
/* get first line pieces */
sscanf (new_line, "%s %s %s %s %s %s %s",
&ts, &sh, &sp, >, &dh, &dp, &fl);
/* if an ERR line, just show it */
if (strcmp(fl, "ERR:") == 0)
{
error_line(new_line);
continue;
}
/* now get variable part starting with the ":" considering that */
/* interpretation of the remaining fields depends on the flag value */
/* This is necessary to find the ending timestamp for FIN, RST, and
TRM events.
*/
strcpy(parse_line, new_line);
cursor = parse_line;
vp = (char *)strsep(&cursor, ":" );
if ((cursor == (char *)NULL) ||
(vp == (char *)NULL))
{
error_line(new_line);
continue;
}
/* Classify the event type by looking at the flag field from input
records */
if ((strcmp(fl, "REQ") == 0) ||
(strcmp(fl, "REQ-") == 0))
event_type = REQ;
else
{
if ((strcmp(fl, "RSP") == 0) ||
(strcmp(fl, "RSP-") == 0))
event_type = RSP;
else
{
if ((strcmp(fl, "FIN") == 0) ||
(strcmp(fl, "TRM") == 0) ||
(strcmp(fl, "RST") == 0))
{
/* need the ending timestamp from these record types */
sscanf(cursor, "%s %s", &discard, &earliest_end);
event_type = END;
}
else
{
if (strcmp(fl, "SYN") == 0)
event_type = SYN;
else
{
if (strcmp(fl, "ACT-REQ") == 0)
event_type = ACT_REQ;
else
if (strcmp(fl, "ACT-RSP") == 0)
event_type = ACT_RSP;
}
}
}
}
/* now use data from new trace record to update status */
/* first check to see if this is the same client host */
if (strcmp(current_src, sh) != 0)
{
if (client_state == IDLE)
log_IDLE(last_client_ts);
ClearConnections();
client_state = PENDING_ACTIVE;
strcpy(current_src, sh);
}
/* update the connection status for this client's connection */
set_connection(sp, dh, dp, event_type);
/* The main processing for idle periods is done by maintaining a state
variable (client_status) for the client and looking for specific input
record types at different values of the state variable. The
values of client_state and their implications are:
PENDING_ACTIVE - A new client is started and remains PENDING_ACTIVE
until an activity indication such as ACT-REQ,
ACT-RSP, or REQ is seen in which case it enters
the ACTIVE state. If there is an initial response,
PENDING_IDLE is entered.
ACTIVE - At least one request is outstanding and the state
can only change if there is a response completion
or connection termination.
PENDING_IDLE - There are no requests outstanding but the idle
period threshold has not elapsed since it entered
the PENDING_IDLE state.
IDLE - No outstanding requests for a period greater than
the idle threshold. The IDLE (and PENDING_IDLE)
states are exited on activity indication such as
ACT-REQ, ACT-RSP, or REQ
*/
switch (client_state)
{
case PENDING_ACTIVE:
switch (event_type)
{
case SYN:
break;
case ACT_REQ:
case ACT_RSP:
client_state = ACTIVE;
break;
case REQ:
client_state = ACTIVE;
log_REQ();
break;
case RSP:
client_state = PENDING_IDLE;
strcpy(idle_begin, ts);
log_RSP();
break;
case END:
break;
}
break;
case ACTIVE:
switch (event_type)
{
case SYN:
case ACT_REQ:
case ACT_RSP:
break;
case REQ:
log_REQ();
break;
case RSP:
log_RSP();
if (ConnectionsActive() == 0) /* Any active connections?*/
{
client_state = PENDING_IDLE;
strcpy(idle_begin, ts);
}
break;
case END:
if (ConnectionsActive() == 0) /* Any active connections?*/
{
client_state = PENDING_IDLE;
strcpy(idle_begin, earliest_end);
}
break;
}
break;
case PENDING_IDLE:
/* must start checking time, if > n seconds elapse since
entering PENDING_IDLE state, enter IDLE state */
elapsed = elapsed_ms(ts, idle_begin);
if (elapsed < idle_limit)
{
switch (event_type)
{
case SYN:
case END:
break;
case ACT_REQ:
case ACT_RSP:
client_state = ACTIVE;
break;
case REQ:
client_state = ACTIVE;
log_REQ();
break;
case RSP:
log_RSP();
break;
}
break; /* ends case PENDING_IDLE */
}
else /* it has crossed the idle threshold */
client_state = IDLE;
/* NOTE: drop through to IDLE to handle the current event */
case IDLE:
switch (event_type)
{
case SYN:
case END:
break;
case ACT_REQ:
case ACT_RSP:
client_state = ACTIVE;
log_IDLE(ts);
break;
case REQ:
client_state = ACTIVE;
log_IDLE(ts);
log_REQ();
break;
case RSP:
log_RSP();
break;
break; /* ends case PENDING_IDLE */
}
break;
default:
break;
} /* end switch */
strcpy(last_client_ts, ts);
} /* end while (!feof ....) */
close (dumpFP);
close (outFP);
}
/* updates the status of connections for each interesting event */
void set_connection(char *sp, char *dh, char *dp, enum event_types type)
{
int cx;
/* A connection is identified by the host/port 3-tuple (the source IP
address is not needed because only one client is handled at a time).
The connection's status depends on the type of the event that caused
the update. The following event type are defined with their effect
on the connection's status:
SYN, ACT-REQ, - The connection has begun (is an "active"
ACT-RSP connection. Add it to the table as idle
(activity == 0) if a SYN or with request/
response activity (activity == 1) for ACT-REQ
or ACT-RSP.
REQ - Find the connection in the table and mark it with
an outstanding request (activity == 1).
RSP - Find the connection in the table and mark it with
a completed request (activity == 0).
END - The connection has ended (is no longer an "active"
connection). Remove it from the table.
*/
switch (type)
{
case SYN:
case ACT_REQ:
case ACT_RSP:
{
cx = AddConnection(sp, dh, dp);
if (cx < 0) /* already there */
{
error_state("Add for existing connection");
return;
}
if (cx > MAX_CONNECTIONS) /* table overflow */
{
error_state("Active connections exceeds maximum");
exit (-1);
}
connections[cx].state = type;
if (type == SYN)
connections[cx].activity = 0;
else
connections[cx].activity = 1;
break;
}
case REQ:
{
cx = FindConnection(sp, dh, dp);
if (cx < 0) /* not there */
{
error_state("REQ for non-existent connection");
return;
}
if ((connections[cx].state == RSP) ||
(connections[cx].state == ACT_REQ) ||
(connections[cx].state == SYN))
{
connections[cx].activity = 1;
connections[cx].state = REQ;
}
else
error_state("REQ in invalid connection state");
break;
}
case RSP:
{
cx = FindConnection(sp, dh, dp);
if (cx < 0) /* not there */
{
error_state("RSP for non-existent connection");
return;
}
if ((connections[cx].state == REQ) ||
(connections[cx].state == ACT_RSP) ||
(connections[cx].state == SYN))
{
connections[cx].activity = 0;
connections[cx].state = RSP;
}
else
error_state("RSP in invalid connection state");
break;
}
case END:
{
cx = FindConnection(sp, dh, dp);
if (cx < 0) /* not there */
{
error_state("End for non-existent connection");
return;
}
connections[cx].activity = 0;
connections[cx].state = END;
cx = RemoveConnection(sp, dh, dp);
break;
}
default:
break;
}
}
/* A set of functions to maintain the table of "active" connections for the
current client (IP address). All of these use simple linear scans of the
table because we expect the number of concurrently active connections
from a client to be small (< 100) */
/* Clears the active connections from the connection table and
resets the count of active connections to zero */
void ClearConnections(void)
{
int i;
for (i = 0; i < active_connections; i++)
{
strcpy(connections[i].id, "");
connections[i].activity = 0;
connections[i].state = END;
}
active_connections = 0;
}
/* Count the number of active connections that have an outstanding
request (activity == 1) */
int ConnectionsActive(void)
{
int count = 0;
int i;
for (i = 0; i < active_connections; i++)
count = count + connections[i].activity;
return (count);
}
/* Find a connection in the table by its identifying host/port 3-tuple
and return its index (or -1 if not found). Note that the source IP
address is not necessary since the table is used for one client
at a time.
*/
int FindConnection(char *sp, char *dh, char *dp)
{
char connection[50];
int i;
strcpy(connection, sp);
strcat(connection, dh);
strcat(connection, dp);
/* find the connection in the table */
for (i = 0; i < active_connections; i++)
{
if (strcmp(connections[i].id, connection) == 0)
break;
}
if (i == active_connections) /* not there */
return(-1);
else
return (i);
}
/* Add a new connection to the table identified by its host/port
3-tuple (source IP is not needed because the table is for one
client (IP address) only). Return the index of the added
connection or -1 if it is already in the table. Increase the
count of active connections by 1 if added. Initialize the
state of the added connection to the reset state.
*/
int AddConnection(char *sp, char *dh, char *dp)
{
char connection[50];
int i;
strcpy(connection, sp);
strcat(connection, dh);
strcat(connection, dp);
/* check to see if connection already in the table; if not there, add it */
for (i = 0; i < active_connections; i++)
{
if (strcmp(connections[i].id, connection) == 0)
break;
}
if (i < active_connections)
return(-1); /* already there */
else
{
active_connections += 1;
if (active_connections > MAX_CONNECTIONS)
return (active_connections); /* table overflow */
strcpy(connections[i].id, connection);
connections[i].activity = 0;
connections[i].state = END;
return (i);
}
}
/* Remove a connection from the table and compact the table by shifting
all connections above the "hole" down by one index value. Return 0
if all is OK or -1 if the connection was not there. Decrease the
count of active connections by one if one was removed. Reset the
vacated table entry to the reset state
*/
int RemoveConnection(char *sp, char *dh, char *dp)
{
char connection[50];
int i, j;
strcpy(connection, sp);
strcat(connection, dh);
strcat(connection, dp);
/* find the connection in the table; if not there, error */
for (i = 0; i < active_connections; i++)
{
if (strcmp(connections[i].id, connection) == 0)
break;
}
if (i == active_connections)
return (-1);
/* move all active connections above this down by one with overwriting */
for (j = i + 1; j < active_connections; j++)
{
strcpy(connections[j-1].id, connections[j].id);
connections[j-1].activity = connections[j].activity;
connections[j-1].state = connections[j].state;
}
/* clearing the top vacated slot is not strictly necessary but it may
help with debugging */
strcpy(connections[active_connections - 1].id, "");
connections[active_connections - 1].activity = 0;
connections[active_connections - 1].state = END;
active_connections -= 1;
return (0);
}
/* Copy the input line to the output file */
void log_REQ(void)
{
fprintf(outFP, "%s", new_line);
}
/* Copy the input line to the output file */
void log_RSP(void)
{
fprintf(outFP, "%s", new_line);
}
/* create a line in the output for the idle period */
void log_IDLE(char *ts)
{
int elapsed;
elapsed = (int) elapsed_ms(ts, idle_begin);
fprintf(outFP, "%s %-15s %5s > %-15s %5s IDLE%12d %s\n",
ts, current_src, "*", "*", "*", elapsed, idle_begin);
}
void error_line(char * s)
{
fprintf(outFP, "%s", s);
}
void error_state(char * s)
{
fprintf(outFP, "%s %-15s %5s > %-15s %5s ERROR %s\n",
ts, current_src, "*", "*", "*", s);
}
/*--------------------------------------------------------------*/
/* subtract two timevals (t1 - t0) with result in tdiff */
/* tdiff, t1 and t0 are all pointers to struct timeval */
/*--------------------------------------------------------------*/
static void
tvsub(tdiff, t1, t0)
struct timeval *tdiff, *t1, *t0;
{
tdiff->tv_sec = t1->tv_sec - t0->tv_sec;
tdiff->tv_usec = t1->tv_usec - t0->tv_usec;
if (tdiff->tv_usec < 0)
{
tdiff->tv_sec--;
tdiff->tv_usec += 1000000;
}
}
/*--------------------------------------------------------------*/
/* compute the elapsed time in milliseconds to end_time */
/* from some past time given by start_time (both formatted timevals) */
/*--------------------------------------------------------------*/
long elapsed_ms(char *end, char *start)
{
struct timeval delta, end_time, start_time;
long elapsed_time;
char end_tmp[20];
char start_tmp[20];
char *cursor;
char *cp;
strcpy(end_tmp, end);
cursor = end_tmp;
cp = (char *)strsep(&cursor, "." );
end_time.tv_sec = atoi(end_tmp);
end_time.tv_usec = atoi(cursor);
strcpy(start_tmp, start);
cursor = start_tmp;
cp = (char *)strsep(&cursor, "." );
start_time.tv_sec = atoi(start_tmp);
start_time.tv_usec = atoi(cursor);
tvsub(&delta, &end_time, &start_time);
/* express as milliseconds */
elapsed_time = (delta.tv_sec * 1000) + (delta.tv_usec/1000);
return (elapsed_time);
}
syntax highlighted by Code2HTML, v. 0.9.1