package Lire::Syslog; use strict; use Time::Local; use Lire::Logger qw/ lr_debug lr_info lr_err /; use Lire::Time qw/ syslog2cal clf2cal date2cal getMonthName /; use vars qw/ $LTIME @syslog_flavours $debug /; BEGIN { $LTIME = [ localtime ]; $debug = 0; } # Function bust be defined, so the assignment can't go in the # BEGIN block # # Order is important since the first that succeeds win. So when # a format is a subformat of another (i.e. classic to sol8) # put the longer format first # @syslog_flavours = ( [ "Solaris 8 syslog daemon", \&parse_sol8_syslog ], [ "BSD syslog daemon", \&parse_bsd_syslog ], [ "BSD timestamp-only syslog daemon", \&parse_bsd_timestamp_syslog ], [ "Netscape Messaging Server logging daemon", \&parse_nms_syslog ], [ "SunONE Messaging Server logging daemon", \&parse_s1ms_syslog ], [ "Unknown syslog daemon with year and code", \&parse_unknown_syslog ], [ "WebTrends syslog daemon", \&parse_wt_syslog ], [ "Kiwi (ISO format) syslog daemon", \&parse_kiwi_iso_syslog ], # Rationale for putting the US format before the regulard DD-MM format # altough we will choose the wrong US format 40% of the time (144 days # on 365): US citizens seems very fond of their format and non-US # are more likely to pick the ISO format (this is completely intuitive and # not based on any evidence). Anyway, we did receive more logs in the # US date format. [ "Kiwi (US format) syslog daemon", \&parse_kiwi_us_syslog ], [ "Kiwi (DD-MM-YYYY format) syslog daemon", \&parse_kiwi_ddmm_syslog ], [ "Sendmail Switch log daemon", \&parse_sendmail_switch_syslog ], ); #------------------------------------------------------------------------ # Constructor new() # # Using one parser object returning record in hash is nearly 50% faster # then using one object by syslog record. # # Testing on 22410 syslog lines # Benchmark: timing 10 iterations of 1 line = 1 object, parser object... # 1 line = 1 object: 24 wallclock secs (24.23 usr + 0.01 sys = 24.24 CPU) @ 0.41/s (n=10) # parser object: 17 wallclock secs (16.52 usr + 0.00 sys = 16.52 CPU) @ 0.61/s (n=10) # s/iter 1 line = 1 object parser object # 1 line = 1 object 2.42 -- -32% # parser object 1.65 47% -- # sub new { my $self = shift; my $class = ref($self) || $self; bless $self = { 'syslog_parse_func' => \&guess_syslog_format, }, $class; return $self; } #------------------------------------------------------------------------ # Function debug( @msg ) # # Logs @msg at debug level if debugging is enable sub debug(@) { lr_debug( __PACKAGE__, ": ", @_ ) if $debug; } #------------------------------------------------------------------------ # Function decode_facility_level($code) # # Determines from an encoded numeric code the syslog level and facility # # Returns an array ($facility, $level) # my @levels = qw/emerg alert crit err warning notice info debug/; my @facilities = qw/kern user mail daemon auth syslog lpr news uucp cron authpriv ftp ntp authaudit authalert cron local0 local1 local2 local3 local4 local5 local6 local7/; sub decode_priority($) { my $priority = shift; die "encoded priority should be between 0 and 191: $priority\n" unless $priority >= 0 && $priority <= 191; my $level_idx = $priority % 8; my $facility_idx = ($priority - $level_idx) / 8; return ( $facilities[$facility_idx], $levels[$level_idx]); } #------------------------------------------------------------------------ # Function extract_process( $rec ) # # This function will try to extract process and process id information # from the message. # It will also remove extra timestamp information from the messages' content # (often encountered with Cisco devices, these look like e.g. # # Oct 31 09:00:53 192.168.0.223 83843: *Mar 8 17:58:46: # %SEC-6-IPACCESSLOGP: list ... # # It returns the modified syslog structure sub extract_process { my ( $rec ) = @_; if ( $rec->{'content'} =~ m!^ (?:\d+:\s)? # Sequence number (?:\d[wdhms\d]+:\s)? # Another router info: 2w5h: # Remove extra timestamp as sometimes sent by network devices # MMM DD [YYYY] HH:MM:SS[.MSEC] [TZ]: # systems which are not NTP synced have a leading * # (Tnx anderson and Edwin Groothuis) (?:\*?\w\w\w\s+\d\d?\s(?:\d+\s+)?[\d:.]+(?:\s[a-z]{3})?:\s)? ([^:\[]+) # Process (?:\[(\d+)\])?:\s+ # Process ID; may be undef (.*) # Actual message $!x ) { $rec->{'process'} = $1; $rec->{'pid'} = $2; $rec->{'content'} = $3; } # Otherwise leave untouched $rec; } #------------------------------------------------------------------------ # Method guess_syslog_format( $line ) # # This function is called on the first line parsed by the object. # # It tries to match the line according to different syslog style # and will setup the syslog_parse_func attribute to point to the # correct parser for that syslog style. # # Next parse are going to go directly to the correct parser function. sub guess_syslog_format { my($self,$line) = @_; foreach my $flavour ( @syslog_flavours ) { # Try to parse each in turn # First that matches is the good one debug( "trying $flavour->[0] syslog flavour" ); my $rec = eval { $flavour->[1]->( $self, $line ) }; unless ( $@ ) { lr_info( __PACKAGE__, ": log seems from $flavour->[0]" ); $self->{'syslog_parse_func'} = $flavour->[1]; return $rec; } else { debug( $@ ); } } # This will aborts the program eval { lr_err( < syslog2cal( $month, $day, $time, $LTIME ), 'hostname' => $hostname, 'content' => $content, } ); } #------------------------------------------------------------------------ # Method parse_nms_syslog( $line ) # # Parse messages log through the logging daemon that comes with # Netscape Messaging Server # # [01/Nov/2001:08:27:44 +0100] titan popd[2864]: General Information: ... # sub parse_nms_syslog { my ( $date, $hostname, $process, $pid, $facility, $level, $content ) = ( $_[1] =~ m/^ \s* (\[.*?\])\s+ # Date in common log format ([\w.-]+)\s # Hostname ([^:\[]+) # Process (?:\[(\d+)\])?:\s+ # Process ID; may be undef (\w+)\s+ # Facility (\w+):\s+ # Level (.*) # Message $/x ) or die "invalid Netscape Messaging Server syslog line: $_[1]\n"; return { 'timestamp' => clf2cal( $date ), 'hostname' => $hostname, 'process' => $process, 'content' => $content, 'pid' => $pid, 'facility' => $facility, 'level' => $level, }; } #------------------------------------------------------------------------ # Method parse_s1ms_syslog( $line ) # # Parse messages log through the logging daemon that comes with # SunONE Messaging Server # # 18-Nov-2002 11:53:36.49 72bb.3.1 tcp_local ims-ms E 1 ... # 18-Nov-2002 12:37:42.79 72bb.0b.4 tcp_intranet tcp_local E 1 ... # 18-Nov-2002 13:30:17.82 7304.2.1 tcp_local D 2 ... # 18-Nov-2002 15:29:11.31 72bb.8.13 tcp_intranet J 0 ... # sub parse_s1ms_syslog { my ( $day, $month, $year, $time, $msec, $pid, $count, $in_chan, $out_chan, $entry, $content ) = ( $_[1] =~ m/^(\d{2})-([A-Z]{1}[a-z]{2})-(\d{4})\s+ # Date in format DD-mmm-YYYY (\d{2}:\d{2}:\d{2})(\.\d+)\s+ # Time in format hh:mm:ss.ms ([0-9a-f]+) # pid in hexa (\.[0-9a-f]+\.\d+)\s+ # count (in hexa) and thread ([a-z_ -]{12})\s # source channel ([a-z_ +-]{12})\s # destination channel ([A-Z]{1,3})\s+ # type of entry (.*) # Message $/x ) or die "invalid SunONE Messaging Server syslog line: $_[1]\n"; $in_chan =~ s/ +//g; $out_chan =~ s/ +//g; return { 'timestamp' => syslog2cal( $month, $day, $time, $LTIME ), 'pid' => $pid, 'count' => $count, 'in_chan' => $in_chan, 'out_chan' => $out_chan, 'entry' => $entry, 'content' => $content, }; } #------------------------------------------------------------------------ # Method parse_sol8_syslog ($line) # # Parse Solaris 8 syslog daemon messages: # # Nov 7 09:00:49 imrldmz1 sendmail[29681]: [ID 801593 mail.info] ... sub parse_sol8_syslog { my ( $month, $day, $time, $hostname, $process, $pid, $id, $facility, $level, $content ) = ( $_[1] =~ m/^ \s* (\w\w\w)\s+ # Month (\d\d?)\s+ # Day ([\d:.]+)\s+ # Time ([\w.-]+)\s # Hostname ([^:\[]+) # Process (?:\[(\d+)\])?:\s # Process ID; may be undef \[ID\s # Special Solaris 8 identifier (\d+)\s # Identifier, whatever that means ([a-z0-9]+)\.([a-z]+) # Facility.level \]\s+ (.*) # Message $/x ) or die "invalid Solaris 8 syslog line: $_[1]\n"; return { 'timestamp' => syslog2cal( $month, $day, $time, $LTIME ), 'hostname' => $hostname, 'process' => $process, 'content' => $content, 'pid' => $pid, 'identifier' => $id, 'facility' => $facility, 'level' => $level, }; } #------------------------------------------------------------------------ # Method parse_sendmail_switch_syslog # # Parse messages coming from the logging daemon that # comes with Sendmail Switch Pro (as found on WinTel platforms) # # 04/16/02 00:00:03 sendmail(2716): INFO: sub parse_sendmail_switch_syslog { my ( $month, $day, $year, $time, $hostname, $process, $pid, $level, $content ) = ( $_[1] =~ m!^ \s* (\d+)/(\d+)/(\d+)\s+ # MM/DD/YY ([\d:.]+)\s+ # Time ([^:()]+) # Process (?:\((\d+)\))?:\s+ # Process ID; may be undef (\w+):\s+ # Level (.*) # Message $!x ) or die "invalid Sendmail Switch syslog line: $_[1]\n"; return { 'timestamp' => date2cal( $year+2000, getMonthName($month-1), $day, $time), 'hostname' => $hostname, 'process' => $process, 'pid' => $pid, 'level' => lc $level, 'content' => $content, }; } #------------------------------------------------------------------------ # Method parse_bsd_timestamp_syslog ($line) # # Parse messages which only contains the BSD timestamp: # # Jul 22 00:01:14: sendmail[1204]: sub parse_bsd_timestamp_syslog { my ( $month, $day, $time, $content ) = ( $_[1] =~ m/^ \s* (\w\w\w)\s+ # Month (\d\d?)\s # Day ([\d:.]+)\s # Time (.*) # Message $/x ) or die "invalid BSD timestamp syslog line: $_[1]\n"; extract_process( { 'timestamp' => syslog2cal( $month, $day, $time, $LTIME ), 'content' => $content, } ); } #------------------------------------------------------------------------ # Method parse_unknown_syslog ($line) # # Parse messages from rfc 3164-compliant syslog daemons that keep the encoded # facility and has a year: # # <166>Jul 22 2002 00:01:14: %PIX-6-302001: sub parse_unknown_syslog { my ( $priority, $month, $day, $year, $time, $content ) = ( $_[1] =~ m/^ \s* <(\d+)> # Facility and level encoded (\w\w\w)\s+ # Month (\d\d?)\s # Day (\d\d\d\d)\s # Year ([\d:.]+)\s # Time (.*) # Message $/x ) or die "invalid unknown syslog with year and code line: $_[1]\n"; my ($facility, $level) = decode_priority( $priority ); extract_process( { 'timestamp' => syslog2cal( $month, $day, $time, $LTIME ), 'content' => $content, 'facility' => $facility, 'level' => $level, } ); } #------------------------------------------------------------------------ # Method parse_wt_syslog( $line ) # # Parses syslog message from WebTrends syslog daemon # # Example: WTsyslog[2002-08-23 12:23:55 ip=10.161.2.250 pri=6] <189>49288: Aug 23 12:39:31.427 est: %SYS-5-CONFIG_I: sub parse_wt_syslog { my ( $year, $month, $day, $time, $hostname, $wtpriority, $priority, $content ) = ( $_[1] =~ m/^ \s* WTsyslog\[(\d\d\d\d)- # Year (\d\d)- # Month (\d\d)\s # Day ([\d:.]+)\s # Time ip=([\d.]+)\s # Hostname pri=(\d+)\]\s # WT Priority (?:<(\d+)>)? # Encoded priority (.*) # Message $/x ) or die "invalid WebTrends syslog line: $_[1]\n"; my ($facility, $level) = decode_priority( $priority ); extract_process( { 'timestamp' => date2cal( $year, getMonthName($month-1), $day, $time ), 'hostname' => $hostname, 'content' => $content, 'facility' => $facility, 'level' => $level, } ); } #------------------------------------------------------------------------ # Method parse_kiwi_iso_syslog( $line ) # # Parses syslog message from Kiwi syslog daemon in ISO format # # Kiwi format ISO yyyy-mm-dd (Tab delimited) # Format: DateTime (YYYY-MM-DD HH:MM:SS) [TAB] Level (Facility.Level) [TAB] Host name [TAB] Message text # #Example: 2002-07-22 12:34:56 [TAB] Local5.Debug [TAB] firewall-inside [TAB] prot=UDPf port=53 dst=203.25.36.47 src=192.168.1.2 bytes=64 sub parse_kiwi_iso_syslog { my ( $year, $month, $day, $time, $facility, $level, $hostname, $content ) = ( $_[1] =~ m/^ \s* (\d\d\d\d)- # Year (\d\d)- # Month (\d\d)\s # Day ([\d:.]+)\t # Time (\w+)\. # Facility (\w+)\t # Level ([\w.-]+)\t # Hostname (.*) # Message $/x ) or die "invalid Kiwi (ISO format) syslog line: $_[1]\n"; extract_process( { 'timestamp' => date2cal( $year, getMonthName($month-1), $day, $time ), 'hostname' => $hostname, 'content' => $content, 'facility' => $facility, 'level' => $level, } ); } #------------------------------------------------------------------------ # Method parse_kiwi_us_syslog( $line ) # # Parses syslog message from Kiwi syslog daemon in US format # # Kiwi format US mm-dd-yyyy (Tab delimited) # Format: DateTime (MM-DD-YYYY HH:MM:SS) [TAB] Level (Facility.Level) [TAB] Host name [TAB] Message text # #Example: 07-22-2002 12:34:56 [TAB] Local5.Debug [TAB] firewall-inside [TAB] prot=UDPf port=53 dst=203.25.36.47 src=192.168.1.2 bytes=64 sub parse_kiwi_us_syslog { my ( $month, $day, $year, $time, $facility, $level, $hostname, $content ) = ( $_[1] =~ m/^ \s* (\d\d)- # Month (\d\d)- # Day (\d\d\d\d)\s # Year ([\d:.]+)\t # Time (\w+)\. # Facility (\w+)\t # Level ([\w.-]+)\t # Hostname (.*) # Message $/x ) or die "invalid Kiwi (DD-MM-YYYY format) syslog line: $_[1]\n"; die "not a MM-DD-YYYY date: $month-$day-$year\n" unless $month >=1 && $month <= 12; extract_process( { 'timestamp' => date2cal( $year, getMonthName($month-1), $day, $time ), 'hostname' => $hostname, 'content' => $content, 'facility' => $facility, 'level' => $level, } ); } #------------------------------------------------------------------------ # Method parse_kiwi_ddmm_syslog( $line ) # # Parses syslog message from Kiwi syslog daemon in the DD-MM-YYYY date format. # # Kiwi format mm-dd-yyyy (Tab delimited) # Format: DateTime (DD-MM-YYYY HH:MM:SS) [TAB] Level (Facility.Level) [TAB] Host name [TAB] Message text # #Example: 22-07-2002 12:34:56 [TAB] Local5.Debug [TAB] firewall-inside [TAB] prot=UDPf port=53 dst=203.25.36.47 src=192.168.1.2 bytes=64 sub parse_kiwi_ddmm_syslog { my ( $day, $month, $year, $time, $facility, $level, $hostname, $content ) = ( $_[1] =~ m/^ \s* (\d\d)- # Day (\d\d)- # Month (\d\d\d\d)\s # Year ([\d:.]+)\t # Time (\w+)\. # Facility (\w+)\t # Level ([\w.-]+)\t # Hostname (.*) # Message $/x ) or die "invalid Kiwi (DD-MM-YYYY format) syslog line: $_[1]\n"; die "not a DD-MM-YYYY date: $day-$month-$year\n" unless $month >=1 && $month <= 12; extract_process( { 'timestamp' => date2cal( $year, getMonthName($month-1), $day, $time ), 'hostname' => $hostname, 'content' => $content, 'facility' => $facility, 'level' => $level, } ); } #------------------------------------------------------------------------ # Method parse($line) # # Delegate to the appropriate syslog parse function sub parse { $_[0]->{'syslog_parse_func'}->( @_ ); } 1; __END__ =pod =head1 NAME Lire::Syslog - syslog style lines parser =head1 SYNOPSIS use Lire::Syslog; my $parser = new Lire::Syslog; my $rec = $parser->parse( $line ); =head1 DESCRIPTION This module defines objects able to parse logs coming from several flavours of logging daemon. It currently supports the following syslog file formats: =over =item Classic BSD syslog daemon The "classic" BSD syslog format: MMM DD HH:MM:SS Hostname Message =item Solaris 8 syslog daemon The Solaris 8 syslog daemon also includes the facility and level: MMM DD HH:MM:SS Hostname Process[Pid]: [ID DDDDDD Facility.Level] Message =item Netscape Messaging Server logging daemon The syslog daemon that comes with Netscape Messaging Server uses a date in common log format: [DD/MMM/YYYY:HH:MM:SS +ZZZZ] Hostname Process[Pid]: Facility Level: Message =item WebTrends syslog daemon The format used by the syslog daemon that comes with WebTrends: WTsyslog[YYYY-MM-DD HH:MM:SS ip=HOSTNAME pri=WT_PRIORITY] Message =item Kiwi Syslog (ISO date format) The ISO log file formats used by the Kiwi Syslog daemon (http://www.kiwisyslog.com/info_sysd.htm), a logging daemon often encountered on Win32 platforms: YYYY-MM-DD HH:MM:SS [TAB] Facility.Level [TAB] Hostname [TAB] Message =item Kiwi Syslog (US date format) The US date format used by the Kiwi Syslog daemon: MM-DD-YYYY HH:MM:SS [TAB] Facility.Level [TAB] Hostname [TAB] Message =item Kiwi Syslog (DD-MM-YYY date format) The DD-MM-YYYY date format used by the Kiwi Syslog daemon: DD-MM-YYYY HH:MM:SS [TAB] Facility.Level [TAB] Hostname [TAB] Message =item Sendmail Switch logging daemon The format used by the logging daemon coming with Sendmail Switch on Win32 platforms: MM/DD/YY HH:MM:SS Process(Pid): Level: Message =item RFC 3164-compliant Syslog daemon A format from RFC 3164-compliant Syslog daemons which includes the encoded priority and the year in the date. RFC 3164 defines the "BSD Syslog Protocol". MMM DD YYYY HH:MM:SS: Process[Pid]: Message =back The first time the parse() method is used, the parser will try each of the supported formats to detect the syslog format. If no format matches, the module will call lr_err() and abort the program. Each other parse() invocation will use the same format. The parse() method will return an hash reference which contains the following keys: =over =item timestamp The timestamp of the event. =item hostname The name or IP address of the host that sended the message. =item process The "process" that logged the event. Formally, the syslog message doesn't contain a process field but its usually the first word coming before a colon in the message's content. =item pid The PID of the process that logged the event. This is usually what is between [] in the process part of the message. =item identifier This key is only present when the log comes from a Solaris 8 syslog daemon. It contains the identifier that comes after ID in the message. =item facility The syslog facility (kern, mail, local0, etc.) of the message. This isn't supported in all file formats so this key might be unavailable. =item level The syslog level (emerg, info, notice, etc. ) of the message. This isn't supported in all file formats so this key might be unavailable. =item content The actual syslog message (with the process and pid removed). Many network devices will also have another BSD-style timestamp at the beginning of the message. If present, it will also be removed. =back =head1 USAGE package Lire::Foo; use base qw/ Lire::Syslog /; sub parse { my $self = shift; my $line = shift; # this runs parse from Lire::Syslog, setting keys like 'day', 'process' # and 'hostname' my $rec = $self->SUPER::parse($line); $rec->{'foo'} = dosomethingwith( $rec->{'content'} ); return $rec } Now, one can run in a script my $parser = new Lire::Foo(); while ( <> ) { chomp; my $log = $parser->parse( $line ); } which sets $log->{'day'}, ... $log->{'process'} and $log->{'foo'}. =head1 SEE ALSO Lire::Email(3) =head1 AUTHORS Joost van Baal, Francis J. Lacoste. Initial idea by Joost Kooij =head1 VERSION $Id: Syslog.pm,v 1.15 2006/07/23 13:16:30 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2000-2002 Stichting LogReport Foundation LogReport@LogReport.org This file is part of Lire. Lire is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =cut