#! /bin/sh #!/usr/bin/tclsh # # Copyright (C) 2001 by USC/ISI # All rights reserved. # # Redistribution and use in source and binary forms are permitted # provided that the above copyright notice and this paragraph are # duplicated in all such forms and that any documentation, advertising # materials, and other materials related to such distribution and use # acknowledge that the software was developed by the University of # Southern California, Information Sciences Institute. The name of the # University may not be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. # # ModelGen is a set of scripts that take tcpdump trace as input and # output a set of CDF files that model Web traffic. It also outputs # a time series of traffic size (in 1ms block) for further wavelet # scaling analysis # # usage: # ./ModelGen # # : tcpdump trace file generated using # tcpdump -w option # : the threshold time value (in millisecond) # that distinguishes idle periods in order # to infer user "think" times between # requests for new top-level pages. # : network prefix used to distinguish # inbound vs. outbound traffic # # example: ./ModelGen tracefile 1000 128.9 # # Note that current it's only tested on Linux system # # This work is supported by DARPA through SAMAN Project # (http://www.isi.edu/saman/), administered by the Space and Naval # Warfare System Center San Diego under Contract No. N66001-00-C-8066 # # # date echo "*** parsing tcpdump file ***" echo "*** parsing .all ***" ./tcpdump -n -tt -q -r $1 > $1.all date echo "*** parsing .www ***" ./tcpdump -n -tt -r $1 tcp port 80 > $1.www #./tcpdump -n -tt -r $1 port ftp or port ftp-data > $1.ftp date echo "*** parsing .ftp ***" ./tcpdump -n -tt -r $1 port ftp > $1.ftp1 cat $1.ftp1 | getFTPclient.pl > $1.ftp2 cat $1.all | getFTP.pl -r $1.ftp2 > $1.ftp #./tcpdump -q -n -tt -r $1 port ftp or port ftp-data > $1.ftps #./tcpdump -n -tt -r $1 port domain > $1.dns date echo "*** parsing .http-srv ***" ./tcpdump -n -tt -r $1 tcp src port 80 > $1.http-srv date echo "*** analyze traffic mix ***" #cat $1.all | io.pl -s $3 -w $1.all #cat $1.all.inbound | traffic-classify > $1.traffic.cnt.inbound #cat $1.all.outbound | traffic-classify > $1.traffic.cnt.outbound date echo "*** analyze flow statistics ***" #awk -f flow.awk < $1.all.outbound > $1.all.outbound.flow #awk -f flow.awk < $1.all.inbound > $1.all.inbound.flow #sort -s -o $1.all.outbound.flow.sort -T /tmp $1.all.outbound.flow #sort -s -o $1.all.inbound.flow.sort -T /tmp $1.all.inbound.flow #cat $1.all.outbound.flow.sort | flow.pl -w $1.outbound.flow #cat $1.all.inbound.flow.sort | flow.pl -w $1.inbound.flow #sort -s -o $1.inbound.flow.start.sort $1.inbound.flow.start #sort -s -o $1.outbound.flow.start.sort $1.outbound.flow.start #awk -f arrive2inter.awk < $1.outbound.flow.start.sort > $1.outbound.flow.arrival #awk -f arrive2inter.awk < $1.inbound.flow.start.sort > $1.inbound.flow.arrival #dat2cdf -e 1024 -i 1024 -d 1024 -t $1.outbound.flow.size #dat2cdf -e 1024 -i 1024 -d 1024 -t $1.inbound.flow.size #dat2cdf -e 0 -i 1 -d 1 -t $1.outbound.flow.dur #dat2cdf -e 0 -i 1 -d 1 -t $1.inbound.flow.dur #dat2cdf -e 0 -i 0.001 -d 1 -t $1.outbound.flow.arrival #dat2cdf -e 0 -i 0.001 -d 1 -t $1.inbound.flow.arrival date echo "*** seperate Inbound and Outbound traffic ***" echo "DNS" #io.tcl $1.dns date echo "WWW" #cat $1.www | io.www.pl -s $3 -w $1.www cat $1.http-srv | io.www.pl -s $3 -w $1.http-srv date echo "FTP" #cat $1.ftp | io.ftp.pl -s $3 -w $1.ftp #cat $1.ftps | io.ftp.pl -s $3 -w $1.ftps ############################## date /bin/rm -rf *.time-series /bin/rm -rf *connect.time* echo "*** Analyze Inbound traffic ***" echo "run http_connect" sort -s -o $1.in.http-srv-sort +1 -2 +3 -4 +0 -1 -T /tmp $1.http-srv.inbound http_connect -r $1.in.http-srv-sort -w $1.in.http-srv.connect grep "ACT" $1.in.http-srv.connect > $1.in.http-srv.connect.time sort $1.in.http-srv.connect.time > $1.in.http-srv.connect.time.sort date echo "run http_active" sort -s -o $1.in.http-srv.connect.sort +1 -2 +0 -1 -T /tmp $1.in.http-srv.connect http_active -r $1.in.http-srv.connect.sort -w $1.in.http-srv.active -I $2 date echo "compute CDF statistics" cat $1.in.http-srv.active.activity | outputCDF -e inbound date echo "compute time series (1ms block)" bw.tcl $1.http-srv.inbound cat $1.http-srv.inbound.bw | time-series.pl > $1.in.time-series ############################## date echo "*** Analyze Outbound traffic ***" echo "run http_connect" sort -s -o $1.out.http-srv-sort +1 -2 +3 -4 +0 -1 -T /tmp $1.http-srv.outbound http_connect -r $1.out.http-srv-sort -w $1.out.http-srv.connect grep "ACT" $1.out.http-srv.connect > $1.out.http-srv.connect.time sort $1.out.http-srv.connect.time > $1.out.http-srv.connect.time.sort date echo "run http_active" sort -s -o $1.out.http-srv.connect.sort +1 -2 +0 -1 -T /tmp $1.out.http-srv.connect http_active -r $1.out.http-srv.connect.sort -w $1.out.http-srv.active -I $2 date echo "compute CDF statistics" cat $1.out.http-srv.active.activity | outputCDF -e outbound date echo "compute time series (1ms block)" bw.tcl $1.http-srv.outbound cat $1.http-srv.outbound.bw | time-series.pl > $1.out.time-series ###################################### echo "*** Delay and Bandwidth estimation ***" echo "WWW traffic" date echo "output traffic between web servers and clients" cat $1.www | BW-seq.pl -s $3 -p 80 sort inbound.seq -o inbound.seq.sorted sort outbound.seq -o outbound.seq.sorted date echo "search for DATA/ACK packets which have the same seqence number for outboun d traffic" cat outbound.seq.sorted | BW-pair.pl > $1.outbound.pair date echo "estimate the bandwidth for inbound/outbound traffic" cat $1.outbound.pair | BW.out.pl -w $1.www cat inbound.seq.sorted | BW.in.pl -w $1.www dat2cdf -e 0 -i 0.001 -d 1 -t $1.www.outbound.BW dat2cdf -e 0 -i 0.001 -d 1 -t $1.www.inbound.BW dat2cdf -e 0 -i 0.001 -d 1 -t $1.www.outbound.delay date echo "Locate SYN connection" cat $1.www | delay.pl -p 80 > $1.sync sort -s -o $1.sync.sorted -T /tmp $1.sync date echo "compute delay for each SYN connection pair between servers and clients" pair.tcl $1.sync.sorted > $1.sync.delay sort -s -o $1.sync.delay.sorted -T /tmp $1.sync.delay awk -f delay.awk < $1.sync.delay.sorted > $1.www.inbound.delay dat2cdf -e 0 -i 0.001 -d 1 -t $1.www.inbound.delay ###################################### echo "FTP traffic" date echo "output traffic between ftp servers and clients" cat $1.ftp | BW-seq.pl -s $3 -p 20 sort inbound.seq -o inbound.seq.sorted sort outbound.seq -o outbound.seq.sorted date echo "search for DATA/ACK packets which have the same seqence number for outboun d traffic" cat outbound.seq.sorted | BW-pair.pl > $1.outbound.pair date echo "estimate the bandwidth for inbound/outbound traffic" cat $1.outbound.pair | BW.out.pl -w $1.ftp cat inbound.seq.sorted | BW.in.pl -w $1.ftp dat2cdf -e 0 -i 0.001 -d 1 -t $1.ftp.outbound.BW dat2cdf -e 0 -i 0.001 -d 1 -t $1.ftp.inbound.BW dat2cdf -e 0 -i 0.001 -d 1 -t $1.ftp.outbound.delay date echo "Locate SYN connection" cat $1.ftp | delay.pl -p 21 > $1.sync sort -s -o $1.sync.sorted -T /tmp $1.sync date echo "compute delay for each SYN connection pair between servers and clients" pair.tcl $1.sync.sorted > $1.sync.delay sort -s -o $1.sync.delay.sorted -T /tmp $1.sync.delay awk -f delay.awk < $1.sync.delay.sorted > $1.ftp.inbound.delay dat2cdf -e 0 -i 0.001 -d 1 -t $1.ftp.inbound.delay ###################################### echo "Output statistics for FTP traffic" date grep ".20 >" $1.ftps.outbound | awk -f ftp.awk | sort > $1.ftp.outbound.conn cat $1.ftp.outbound.conn | ftp.pl -w $1.ftp.outbound sort -o $1.ftp.outbound.sess.arrive.sort $1.ftp.outbound.sess.arrive grep ".20 >" $1.ftps.inbound | awk -f ftp.awk | sort > $1.ftp.inbound.conn cat $1.ftp.inbound.conn | ftp.pl -w $1.ftp.inbound sort -o $1.ftp.inbound.sess.arrive.sort $1.ftp.inbound.sess.arrive dat2cdf -e 0 -i 0.001 -d 1 -t $1.ftp.outbound.file.inter dat2cdf -e 0 -i 1000 -d 1000 -t $1.ftp.outbound.size dat2cdf -e 0 -i 1 -d 1 -t $1.ftp.outbound.fileno dat2cdf -e 0 -i 0.001 -d 1 -t $1.ftp.inbound.file.inter dat2cdf -e 0 -i 1000 -d 1000 -t $1.ftp.inbound.size dat2cdf -e 0 -i 1 -d 1 -t $1.ftp.inbound.fileno awk -f arrive2inter.awk < $1.ftp.outbound.sess.arrive.sort > $1.ftp.outbound.sess.inter dat2cdf -e 0 -i 0.01 -d 1 -t $1.ftp.outbound.sess.inter awk -f arrive2inter.awk < $1.ftp.inbound.sess.arrive.sort > $1.ftp.inbound.sess.inter dat2cdf -e 0 -i 0.01 -d 1 -t $1.ftp.inbound.sess.inter ###################################### echo "Output TCP window size date echo "WWW" grep " S " $1.www.outbound | grep ".80 >" > $1.www.outbound.svr.win grep " S " $1.www.inbound | grep ".80 >" > $1.www.inbound.svr.win grep " S " $1.www.outbound | grep ".80:" > $1.www.outbound.clnt.win grep " S " $1.www.inbound | grep ".80:" > $1.www.inbound.clnt.win awk -f win.awk < $1.www.outbound.svr.win > $1.www.outbound.wins awk -f win.awk < $1.www.inbound.svr.win > $1.www.inbound.wins awk -f win.awk < $1.www.outbound.clnt.win > $1.www.outbound.winc awk -f win.awk < $1.www.inbound.clnt.win > $1.www.inbound.winc dat2cdf -e 1024 -i 1024 -d 1024 -t $1.www.outbound.wins dat2cdf -e 1024 -i 1024 -d 1024 -t $1.www.outbound.winc dat2cdf -e 1024 -i 1024 -d 1024 -t $1.www.inbound.wins dat2cdf -e 1024 -i 1024 -d 1024 -t $1.www.inbound.winc echo "FTP" grep " S " $1.ftp.outbound | grep ".20 >" > $1.ftp.outbound.svr.win grep " S " $1.ftp.inbound | grep ".20 >" > $1.ftp.inbound.svr.win grep " S " $1.ftp.outbound | grep ".20:" > $1.ftp.outbound.clnt.win grep " S " $1.ftp.inbound | grep ".20:" > $1.ftp.inbound.clnt.win awk -f win.awk < $1.ftp.outbound.svr.win > $1.ftp.outbound.wins awk -f win.awk < $1.ftp.inbound.svr.win > $1.ftp.inbound.wins awk -f win.awk < $1.ftp.outbound.clnt.win > $1.ftp.outbound.winc awk -f win.awk < $1.ftp.inbound.clnt.win > $1.ftp.inbound.winc dat2cdf -e 1024 -i 1024 -d 1024 -t $1.ftp.outbound.wins dat2cdf -e 1024 -i 1024 -d 1024 -t $1.ftp.outbound.winc dat2cdf -e 1024 -i 1024 -d 1024 -t $1.ftp.inbound.wins dat2cdf -e 1024 -i 1024 -d 1024 -t $1.ftp.inbound.winc date echo "execution complete"