#!/usr/bin/perl -w
#
# NetFlow V5 client that stores flows into a MySQl Database
#
# Original Version
# (C) 2002 - Deri Luca <deri@ntop.org>
#
# Modified Version (C) 2003 by
# sinma GmbH - Uli Staerk <uli@sinma.de>
# Berlin - Germany
#
# Prerequisites:
# - Perl
# - Perl DBI module (www.cpan.org)
# - Perl IO::Socket module (www.cpan.org)
# - Perl POSIX modules (www.cpan.org)
# - Perl NetAddr::IP module (www.cpan.org)
#
# Installation:
# - configure your NetFlow probes (ntop,nProbe,Cisco,...) to match the ip of the
# machine this script will be running on. The default port it listens for flows
# is UDP 2055
# - copy this script to any desired location in your directory tree
# - make this script executable (chmod)
# - check if the path to your perl executable in the first line of this script is correct
# - create the 'flows' table if you need it (read on)
#
# Usage:
# ./netFlowClient.pl
#
# This modified version does not only store the raw flows into a single
# table which will easily grow up to 2 gigs a day.
#
# It creates a table for every month (machine time, not packet time - have to fix that)
# and in that table creates an entry for each host specified in a network list - every day.
# This makes it very easy to get traffic information for a specific host during a specific period
#
# When a flow is coming in it gets checked weather it is coming or going to a host in
# the localnets list and the corresponding field for that host gets updated.
#
# The script produces almost no load at all on a dual P3 1000 with 2 G RAM and SCSI
# However the mysqld daemon might raise your load up to 0.04 on the same machine with
# 4096 hosts being monitored. Please read the mysql manual for performance tuning.
# Keep in mind that this might become a real problem when you have a lot of networks in your
# localnets list.
#
# According to my calculations each monthly table gets about 8 MB big when monitoring 4096 hosts
#
# TODO:
# - support other nets then /24
# - support other DBMS then MySQL
# - write local traffic to another table
# - find a way to have traffic-data for every 5 minutes in a reasonable amount of time and disk space
#
# Table layout for the 'flows' table:
#
# CREATE TABLE flows (
# ipSrc varchar(19) NOT NULL default '',
# ipDst varchar(19) NOT NULL default '',
# pktSent int(11) NOT NULL default '0',
# bytesSent mediumint(9) NOT NULL default '0',
# startTime time NOT NULL default '00:00:00',
# endTime time NOT NULL default '00:00:00',
# srcPort smallint(6) NOT NULL default '0',
# dstPort smallint(6) NOT NULL default '0',
# tcpFlags tinyint(4) NOT NULL default '0',
# proto tinyint(4) NOT NULL default '0',
# tos tinyint(4) NOT NULL default '0'
# ) TYPE=MyISAM;
# This application has been strongly inspired by:
# ftp://ftp.mcs.anl.gov/pub/systems/software/netflowdb-0.1.0.tar.gz
#
use strict;
use DBI;
use IO::Socket;
use POSIX qw(strftime);
use NetAddr::IP;
my $debug = 0; # prints A LOT OF debug messages (MySQL queries)
my $debug2 = 0;
my $rawFlows = 0; # shall we dump the raw flows into the 'flows' table (don't forget to create it first!)? 1 = true, 0 = false
my $ignoreLocalTraffic = 1; # shall we ignore traffic the host in our localnets have among each other? 1 = true, 0 = false
# array containing our local nets we want to get stored
# notice: only /24 nets (256 hosts) are supported at the moment.
# see the examples below for syntax.
my @ip = ('212.21.77.0',
'212.21.78.0',
'212.21.79.0',
'212.21.85.0',
'212.21.86.0',
'212.21.90.0',
'212.21.92.0',
'212.21.93.0',
'212.21.94.0',
'212.21.95.0',
'212.42.228.0',
'212.42.229.0',
'212.42.230.0',
'212.42.231.0',
'212.42.235.0',
'213.83.17.0');
# set your database parameters according to your needs
my $databaseName = "DBI:mysql:traffic"; # Database name
my $databaseUser = "traffic"; # Database user name
my $databasePw = "traffic"; # Database user password
###################
#
# No need to edit anything below here.
#
###################
my @nets;
foreach(@ip) {
my $temp=$_;
my $tmp = new NetAddr::IP $temp;
my $num = $tmp->numeric();
push @nets,$num;
}
my $numHosts = scalar(@nets)*256;
my $numFlow = 0;
my $udpPort = 2055; # Default port
# Open a socket to receive Cisco netflow output packets
my $flows = IO::Socket::INET->new(Proto => "udp",
LocalAddr => "0.0.0.0",
LocalPort => "($udpPort)")
|| die "cannot open UDP socket listening @ port $udpPort";
my $dbh = DBI->connect($databaseName, $databaseUser, $databasePw) || die "Connect failed: $DBI::errstr\n";
print "Waiting for NetFlow V5 flows on port $udpPort\n";
my $first_time = strftime "%Y%m%d", localtime;
my $first_month = int($first_time/100);
my $now_time;
my $now_month;
my $run = 0; # are we running yet? if not check if we should create any tables / data sets
while(1) {
my ($buf,$rcvval);
$rcvval=$flows->recv($buf,2000,0) || die "recv: $!";
if($debug) {
printf "Received %d byte packet\n",length($buf);
}
# Flow header
my @recvdFlow=unpack("a24 a48 a48 a48 a48 a48 a48 a48 a48 a48 a48 a48".
" a48 a48 a48 a48 a48 a48 a48 a48 a48 a48 a48".
" a48 a48 a48 a48 a48 a48 a48 a48",$buf);
my ($hver,$hcount,$hsysuptime,$hunix_secs,$hunix_nsecs,$hflow_sequence)= unpack("n n N N N N x4",shift(@recvdFlow));
next if ($hver != 5); # We handle NetFlow 5 only
next if ($hcount != ((length($buf)-24)/48)); # This packet doesn't seem to be ok
my $basetime=$hunix_secs+($hunix_nsecs/1000000000)-($hsysuptime/1000);
my $aFlow=shift(@recvdFlow);
$now_time = strftime "%Y%m%d", localtime;
$now_month = int($now_time/100);
if($now_time!=$first_time || !$run) { # check if date (year,month,day) changed
if($now_month!=$first_month || !$run) { # if date changed check if month changed
initializetable(1); # create a new monthly table
$first_month=$now_month;
}
initializetable(2); # create table entries for today
$first_time=$now_time;
$run = 1; # from no on we only check for the tables when the date changed
}
while(defined($aFlow)) # This loop ends
{
if(length($aFlow) < 48) { # This flow is too small
last;
} else {
my ($fsrc,$fdst,$fnext,$fin,$fout,$fpkts,$focts,$fstrt,$fend,$fsrcp,
$fdstp,$ftcp,$fprot,$ftos,$fsas,$fdas,$fsmsk,$fdmsk)
= unpack("N3 n2 N4 n2 x C3 n2 C2 x2", $aFlow);
if($rawFlows) {
my $sql = "INSERT INTO flows (ipSrc, ipDst, pktSent, bytesSent, startTime, endTime, srcPort, ".
"dstPort, tcpFlags, proto, tos) VALUES ('".num2ip($fsrc)."', '".num2ip($fdst)."', ".
"'$fpkts', '$focts', '".timestr($basetime+($fstrt/1000))."', '".timestr($basetime+($fend/1000)).
"', '$fsrcp', '$fdstp', '$ftcp', '$fprot', '$ftos')";
if($debug) { print $sql."\n"; }
$dbh->do($sql);
}
foreach(@nets) { # check the incoming flow if one of the ip fields belongs to our local nets
my $temp=$_;
if (($fsrc&0xffffff00)==$temp) { # source ip is in localnets, update bytesSent field
if($ignoreLocalTraffic) { # we don't count traffic the hosts in our network list have among each other
my $inLocal = 0;
foreach(@nets) {
my $temp2=$_;
if (($fdst&0xffffff00)==$temp2) {
$inLocal = 1; # destination ip is in our localnets list
if($debug2) { print "Lokaler Traffic: Von: ".num2ip($fsrc)." Nach: ".num2ip($fdst)."\n"; }
}
}
if(!$inLocal) { # do the query if destionation ip is not in our localnets list
my $sql2="UPDATE traffic_$now_month SET bytesSent=bytesSent+$focts WHERE ip='".num2ip($fsrc)."' AND date='$now_time';";
$dbh->do($sql2);
if($debug) { print $sql2."\n"; }
}
} else {
my $sql2="UPDATE traffic_$now_month SET bytesSent=bytesSent+$focts WHERE ip='".num2ip($fsrc)."' AND date='$now_time';";
$dbh->do($sql2);
if($debug) { print $sql2."\n"; }
}
}
if (($fdst&0xffffff00)==$temp) { # destination ip is in localnets, update bytesRcvd field
if($ignoreLocalTraffic) { # we don't count traffic the hosts in our network list have among each other
my $inLocal = 0;
foreach(@nets) {
my $temp2=$_;
if (($fsrc&0xffffff00)==$temp2) {
$inLocal = 1; # source ip is in our localnets list
if($debug2) { print "Lokaler Traffic: Von: ".num2ip($fsrc)." Nach: ".num2ip($fdst)."\n"; }
}
}
if(!$inLocal) { # do the query if source ip is not in our localnets list
my $sql3="UPDATE traffic_$now_month SET bytesRcvd=bytesRcvd+$focts WHERE ip='".num2ip($fdst)."' AND date='$now_time';";
$dbh->do($sql3);
if($debug) { print $sql3."\n"; }
}
} else {
my $sql3="UPDATE traffic_$now_month SET bytesRcvd=bytesRcvd+$focts WHERE ip='".num2ip($fdst)."' AND date='$now_time';";
$dbh->do($sql3);
if($debug) { print $sql3."\n"; }
}
}
}
$numFlow++;
}
$aFlow=shift(@recvdFlow);
}
} # while
$dbh->disconnect();
#########################
sub timestr {
my $t=shift(@_);
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime($t);
$mon++; # localtime() returns 0=Jan, 1=Feb, etc
$year+=1900; # $year is the number of years since 1900,
# that is, $year is 123 in year 2023.
return(sprintf("%4.4d%2.2d%2.2d%2.2d%2.2d%2.2d",$year,$mon,$mday,$hour,$min,$sec));
}
#########################
sub num2ip {
my $a = shift(@_);
return(sprintf("%d.%d.%d.%d",
($a&0xff000000)>>24, ($a&0x00ff0000)>>16,
($a&0x0000ff00)>>8, ($a&0x000000ff)));
}
#########################
sub initializetable {
my $mode = shift(@_);
if($mode==1) { # create monthly table. mysql will check for us that the table doesn't exist yet. ugly.
my $query = "CREATE TABLE IF NOT EXISTS traffic_$now_month (
ip varchar(15) NOT NULL default '',
bytesSent bigint(20) NOT NULL default '0',
bytesRcvd bigint(20) NOT NULL default '0',
date int(8) NOT NULL default '0',
PRIMARY KEY date (date,ip)
) TYPE=MyISAM;";
# primary key date is an index over the date and ip fields which should make the update queries for each incoming flow much faster
# because it massively speeds up searching for the dataset specified in the WHERE clause
$dbh->do($query) or die "Failed to create monthly traffic table.";
} elsif($mode==2) { # create daily entries in monthly table
my $query2="SELECT count(DISTINCT ip) AS c FROM traffic_$now_month WHERE date='$now_time'";
my $countHosts = $dbh->selectrow_array($query2);
if($numHosts!=$countHosts) { # only do if the hosts for $now_time don't exist yet. the script might come here when started for the second time on one day
foreach(@nets) { # go through our network list and create an empty data set for each host
my $temp=$_;
for(my $i=$temp; $i<$temp+256; $i++) {
my $query3="INSERT IGNORE INTO traffic_$now_month VALUES('".num2ip($i)."',0,0,$now_time);";
$dbh->do($query3);
}
}
}
}
}
syntax highlighted by Code2HTML, v. 0.9.1