package Lire::WELF; use strict; use base qw/ Exporter /; use Time::Local; use vars qw/ @EXPORT_OK /; BEGIN { @EXPORT_OK = qw//; } ######################################################################## # PARSER METHODS ######################################################################## sub new { my $self = shift; my $class = ref($self) || $self; bless $self = {}, $class; return $self; } sub welf_time { my ( $time_str ) = @_; my ($year, $month, $mday, $hours, $min, $sec, $msec, $tz ) = $time_str =~ /^(\d\d\d\d)-(\d\d?)-(\d\d?)\s+ # YYYY-MM-DD (\d\d?):(\d\d?)(?::(\d\d?)(?:\.(\d+))?)?\s* # hh:mm:ss.ms (\S+)? # TZ $/x or die "invalid WELF date string: $time_str\n"; $month--; $year -= 1900; if ( defined $tz && $tz eq 'UTC' ) { return timegm( $sec || 0, $min, $hours, $mday, $month, $year ); } elsif ( defined $tz ) { # FIXME: A little hackish. This assumes that the underlying OS # understand that value # Hopefully, it shouldn't happen too much since the WELF standard # doesn't allow a timezone extension, but i've seen UTC used in the # field. (Never saw other timezone string, so this may be dead code). local $ENV{'TZ'} = $tz; return timelocal( $sec || 0, $min, $hours, $mday, $month, $year ); } else { return timelocal( $sec || 0, $min, $hours, $mday, $month, $year ); } } sub parse { my ($self, $line) = @_; $line =~ s/\r$//; # Strip trailing CR die "not a WELF record: missing id= field" unless $line =~ /\bid=/; die "not a WELF record: missing time= field" unless $line =~ /\btime=/; my %welf = (); while ( $line =~ /\b(\w+)=(?:"(.*?)"|([^" ]+))/g) { $welf{$1} = defined $2 ? $2 : $3; # Transform time string into epoch $welf{'time'} = welf_time( $2 ) if ( $1 eq "time" ); } # SonicWall extensions handling if ( defined $welf{'src'} && index( $welf{'src'}, ":" ) != -1 ) { my @src = split /:/, $welf{'src'}; if ( @src == 3 ) { ($welf{'src'}, $welf{'src_port'}, $welf{'src_if'} ) = @src; } elsif ( $src[1] =~ /^\d+$/ ) { # Second argument looks like a port number ($welf{'src'}, $welf{'src_port'} ) = @src; } else { ($welf{'src'}, $welf{'snt_if'} ) = @src; } } if ( defined $welf{'dst'} && index( $welf{'dst'}, ":" ) != -1 ) { my @dst = split /:/, $welf{'dst'}; if ( @dst == 3 ) { ($welf{'dst'}, $welf{'dst_port'}, $welf{'dst_if'} ) = @dst; } elsif ( $dst[1] =~ /^\d+$/) { # Second argument looks like a port number ($welf{'dst'}, $welf{'dst_port'} ) = @dst; } else { ($welf{'dst'}, $welf{'dst_if'} ) = @dst; } } return \%welf; } 1; __END__ =pod =head1 NAME Lire::WELF - Base implementation of a WebTrends(tm) Enhanced Log Format parser =head1 SYNOPSIS use Lire::WELF; my $parser = new Lire::WELF; my $welf_rec = $parser->parse( $line ); =head1 DESCRIPTION This module defines an object able to parse WebTrends(tm) Enhanced Log Format. That log format was defined by WebTrends(tm) for its Firewall Suite. It defines an extensible format that is now used by several packet filter and/or proxy firewall. The document describing that format is available from http://www.webtrends.com/partners/welfOverview.htm A list of products/vendor that supports this format can be found at http://www.webtrends.com/partners/firewall.htm You create a WELF parser object using the new() method: my $parser = new Lire::WELF(); To parse a WELF record you invoke the parser() method with the line containing the record as parameter: my $rec = $parser->parse( $line ); The $rec is an hash reference with the WELF field name used as keys and the value of the records as values. No value are interpreted or checked in any except for the following: =over 4 =item time The time field will be converted to epoch time =item quotation The quotation marks used when the value contains spaces are removed. =back =head2 WELF EXTENSIONS The parser also supports "extensions" to the format found in the field. =over =item port We will interpret correctly src and dst fields that have the port embededded in them: src=192.168.1.1:1037 =item interface A possible :IFNAME will also be interpreted as the interface name: dst=192.168.100.10:23:WAN =item timezone The time field may contains a time zone identifier: time="2001-12-02 12:34:12 UTC" =back =head2 Derived Fields The parser will generate a few 'derived' to complete the format. =over 4 =item src_port Will contain the src port portion of the dst field when SonicWall extensions are used. =item dst_port Will contain the interface portion of the dst field when SonicWall extensions are used. =item src_if Will contain the interface portion of the src field when SonicWall extensions are used. =item dst_if Will contain the interface portion of the dst field when SonicWall extensions are used. =back =head1 AUTHOR Francis J. Lacoste =head1 VERSION $Id: WELF.pm,v 1.11 2006/07/23 13:16:30 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2001 Stichting LogReport Foundation LogReport@LogReport.org This file is part of Lire. Lire is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =cut