#!/usr/bin/python
#needs at least python 1.5

#***************************************************************************
#		copyright            : (C) 2001 by McRee		   *
#		email                : mcree@freemail.hu		   *
#***************************************************************************
					     
#***************************************************************************
#*                                                                         *
#*   This program is free software; you can redistribute it and/or modify  *
#*   it under the terms of the GNU General Public License as published by  *
#*   the Free Software Foundation; either version 2 of the License, or     *
#*   (at your option) any later version.                                   *
#*                                                                         *
#***************************************************************************
									   

database = "/var/local/muddlestats.db";
muddle_log = "/var/log/muddleftpd.log";

import re;
import pprint;
import cPickle;
from zlib import compress, decompress;
from time import *;

#print "This is MuddleStats";

# lots of threads are simultaniously logging to the same logfile. we have to separate them by:
re_thread_id 	= re.compile(r': [a-zA-Z0-9]+@[a-zA-Z0-9._-]+\([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+\)/[0-9]+ :');
# when the user logs out of a thread all loglist data is cleared for that thread
re_logout 	= re.compile(r'User logged out');
# when the server exits, all loglist data for all threads is killed
re_server_exit 	= re.compile(r'Info - Received SIG.*');
# unix timestamp - must find this in every row - or else: log syntax error
re_timestamp 	= re.compile(r'[A-Z][a-z]{2} [A-Z][a-z]{2} [ 0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}');
# hostname(ipnumber) of a user
re_host 	= re.compile(r'@[a-zA-Z0-9._-]+\([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+\)');
# username
re_user 	= re.compile(r': [a-zA-Z0-9]+@');
# xxx bytes
re_bytes 	= re.compile(r'[0-9]* bytes');                                                            
# transfers can be finished or aborted
re_finish 	= re.compile(r'.* 226 Transfer done. [0-9]+ bytes transferred|.* 426 Transfer aborted. [0-9]+ bytes transferred');
# retrieve command
re_retr 	= re.compile(r'^\+\+.*retrieve');
# store commands
re_stor 	= re.compile(r'^\+\+.*store|^\+\+.*append');

loglist = {}; # must init dictionaries
userlist = {};# ...
oldstamp = ''; # must init timestamp

def updatedb(s, mode = "RETR"):
    "updates userlist database form the given 's' logline using 'retr' or 'stor' modes"

    global userlist; # we need this global data
        
    # extract some data from the logline
    user = (re_user.search(s)).group()[2:-1];
    bytes = (re_bytes.search(s)).group()[:-5];                                                 
    ttime = strptime(re_timestamp.search(s).group());
    host = (re_host.search(s)).group()[1:];
    times = "%02d" % ttime[0] + "-%02d" % ttime[1] + "-%02d" % ttime[2];
    
    # maintain database tree
    if not userlist.has_key(user): # if user does not exist
	userlist[user]={host: {}};
    if not userlist[user].has_key(host): # if this the never connected from this host
	userlist[user][host]={times: {"RETR": 0, "STOR": 0}};
    if not userlist[user][host].has_key(times): # if the user did not do anything 'today'
	userlist[user][host][times]={"RETR": 0, "STOR": 0};
	
    # this is in fact the 'real' update: we increment the byte counter
    userlist[user][host][times][mode]=long(userlist[user][host][times][mode]) + long(bytes);
    
# load database
try:
    dbfile = open (database,"r");
    s = dbfile.readline();
    l = cPickle.loads(decompress(dbfile.read()));
    userlist = l[0];
    loglist = l[1];
    oldstamp = l[2];
    dbfile.close();
except IOError:
    pass;    # no panic if file not found

log=open(muddle_log,"r");		# open logfile

rows=1;
s = log.readline();
while s != '':				# read and analize all lines from logfile

    m = re_timestamp.search(s);
    if not m:	# huh!
	print "Error in logfile at line "+str(rows)+":";
	print s;
	print "Invalid FTPD logfile format! - Aborting MuddleStats\n";
	raise IOError;
    timestamp=strptime(m.group());

    m = re_server_exit.search(s);
    if m:				# muddleftpd stopped. all users kicked
	loglist.clear();

    m = re_thread_id.search(s);
    if m and timestamp>oldstamp:	# if we read 'younger' data than the old stamp

	currt = m.group(); 		# get current thread
	if not loglist.has_key(currt):
	    loglist[currt]={"RETRFLAG": 0,"STORFLAG": 0};

        m = re_retr.search(s);
        if m:				# we got 'retrieve'
	    ss = m.group();
	    loglist[currt]["RETRFLAG"]=1;

        m = re_stor.search(s);
        if m:				# we got 'store'
	    ss = m.group();
	    loglist[currt]["STORFLAG"]=1;

	m = re_finish.search(s);
        if m and loglist[currt]["RETRFLAG"]==1:		# we got 'transfer done' after 'retrieve'
	    loglist[currt]["RETRFLAG"]=0;
	    updatedb(s,"RETR");
        elif m and loglist[currt]["STORFLAG"]==1:	# we got 'transfer done' after 'store'
	    loglist[currt]["STORFLAG"]=0;
	    updatedb(s,"STOR");

	m = re_logout.search(s);
	if m: 				# user logged out - drop thread
	    del loglist[currt];
	elif loglist[currt]["RETRFLAG"]==0 and loglist[currt]["STORFLAG"]==0: 
	    del loglist[currt]; 	# drop inactive tread

    s = log.readline();
    rows = rows+1;

#pprint.pprint(loglist);
#pprint.pprint(userlist);

log.close();

# save database
dbfile = open (database,"w");
dbfile.writelines(["MuddleStats datafile v2 - DO NOT MODIFY\n"]);
dbfile.write(compress(cPickle.dumps([userlist,loglist,timestamp])));
dbfile.close();



syntax highlighted by Code2HTML, v. 0.9.1