/*
 * FISG - Logfile parser
 * Programmed and designed by Matti 'ccr' Hamalainen
 * (C) Copyright 2003-2004 Tecnic Software productions (TNSP)
 *
 * Please read file 'COPYING' for information on license and distribution.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "fisg.h"
#include "th_util.h"
#include "th_config.h"
#include "th_string.h"
#include "in_formats.h"


/*
 * Parsing functions
 */
int fisg_parse_int(char *inLine, size_t iLen, size_t *linePos)
{
 int iResult = 0;

 while (th_isdigit(inLine[*linePos]) && (iLen--))
 	{
 	iResult *= 10;
 	iResult += (inLine[(*linePos)++] - '0');
 	}

 return iResult;
}


t_user_entry *fisg_parse_user(t_stats *pStats, char *newNick)
{
 t_user_entry *tmpUser;
 t_str_node *tmpNick;
 
 /* Check if nick matches existing user record */
 tmpNick = nickhash_search(pStats->nickList, newNick);
 if (tmpNick)
 	{
 	/* Yes, increase number of uses */
 	tmpNick->nUsed++;
	return tmpNick->pData;
 	} else {
	/* No, we need to create a new one */
	tmpUser = user_new(newNick);
	tmpNick = th_strnode_new(newNick, 1, tmpUser);

	/* Insert into nicklist */
	if (nickhash_insert(pStats->nickList, tmpNick) != 0)
		{
		/* Failed, due to hash */
		THERR("nickhash_insert() failed, hash: '%s'\n", newNick);
		user_free(tmpUser);
		th_strnode_free(tmpNick);
		return NULL;
		}

	/* Insert into userlist */
	user_insert(pStats->usersList, tmpUser);
	return tmpUser;
	}
}


int fisg_parse_generic(char *inLine, char *fmt, t_lineinfo *lineInfo, t_stats *pStats)
{
 size_t linePos, i;
 BOOL isOK, isEnd, tmpNick1S = FALSE, tmpNick2S = FALSE;
 t_user_entry *tmpUser;
 char	tmpStr[SET_MAX_NICKLEN + 1] = "",
 	tmpNick1[SET_MAX_NICKLEN + 1],
 	tmpNick2[SET_MAX_NICKLEN + 1],
 	tmpDest, c;

 if (!fmt) return -1;

 /* Initialize */
 linePos = 0;
 tmpUser = NULL;
 isOK = TRUE;
 
 /* Parse the line via format-string */
 while (*fmt && isOK)
 {
 if (*fmt == '%')
	{
	switch (*(++fmt)) {
	/* Generic matching */
	case '?':
		/* Match anything */
		fmt++;
		if (inLine[linePos])
			linePos++;
			else
			isOK = FALSE;
		break;

	case '*':
		/* Match anything until next char */
		fmt++;
		while (inLine[linePos] && (inLine[linePos] != *fmt)) linePos++;
		break;
		
	case '@':
		/* Match irssi style optional '@|+| ' */
		fmt++;
		if (!inLine[linePos]) isOK = FALSE;
		if ((inLine[linePos] == '@') ||
			(inLine[linePos] == '+') ||
			th_isspace(inLine[linePos]))
			linePos++;
		break;

	/* Timestamps */
	case 'H': lineInfo->ts.iHours = fisg_parse_int(inLine, 2, &linePos); fmt++; break;
	case 'M': lineInfo->ts.iMinutes = fisg_parse_int(inLine, 2, &linePos); fmt++; break;	
	case 'S': lineInfo->ts.iSeconds = fisg_parse_int(inLine, 2, &linePos); fmt++; break;

	case 'Y': lineInfo->ts.iYear = fisg_parse_int(inLine, 4, &linePos); fmt++; break;
		
	case 'y':
		/* 2-digit year */
		i = fisg_parse_int(inLine, 2, &linePos);
		if (i < 70)
			i += 2000;
			else
			i += 1900;

		lineInfo->ts.iYear = i;
		fmt++;
		break;

	case 'd': lineInfo->ts.iDay = fisg_parse_int(inLine, 2, &linePos); fmt++; break;
	case 'j': lineInfo->ts.iMonth = fisg_parse_int(inLine, 2, &linePos); fmt++; break;


	/* Special matches */
	case 'n':
	case 'N':
		/* Nick */
		tmpDest = *fmt;
		fmt++;

		/* Find the start of the nick */
		th_findnext(inLine, &linePos);

		/* Get the nick to temp buffer */
		i = 0; isEnd = FALSE;

		c = inLine[linePos];
		if (!th_isalpha(c) && !th_isspecial(c)) isOK = FALSE;
		
		while (isOK && !isEnd)
			{
			c = inLine[linePos];
			if (!c || (c == *fmt) || th_isspace(c) || (i >= SET_MAX_NICKLEN))
				isEnd = TRUE;
				else
				{
				if (th_isalpha(c) || th_isdigit(c) || th_isspecial(c) || (c == '-'))
					tmpStr[i++] = inLine[linePos++];
					else
					isOK = FALSE;
				}
			}
			
		tmpStr[i++] = 0;
		
		while (inLine[linePos] && th_isspace(inLine[linePos]) && (inLine[linePos] != *fmt)) linePos++;
		if (inLine[linePos] != *fmt) isOK = FALSE;

		/* Find user or add new */
		if (isOK && (i > 0))
			switch (tmpDest) {
			case 'n': tmpNick1S = TRUE; strcpy(tmpNick1, tmpStr); break;
			case 'N': tmpNick2S = TRUE; strcpy(tmpNick2, tmpStr); break;
			}
		break;

	case 'm':
		/* Mode */
		fmt++;
		while (inLine[linePos] && (inLine[linePos] != *fmt)) linePos++;
		break;

	case 'c':
		/* Channel */
		fmt++;
		while (inLine[linePos] && (inLine[linePos] != *fmt)) linePos++;
		break;

	case 't':
		/* Text */
		fmt++;
		i = 0;
		while (inLine[linePos] && (inLine[linePos] != *fmt) && (i < SET_MAX_BUF))
			lineInfo->pText[i++] = inLine[linePos++];

		lineInfo->pText[i++] = 0;
		break;

	/* Error */
	default:
		THERR("Syntax error in format-string '%s'\n", fmt);
		return -1;
	}
	} else {
	/* Check matches */
	if (*fmt != inLine[linePos])
		isOK = FALSE;

	fmt++;
	linePos++;
	}

 } /* while(*fmt) */

 if (isOK)
 	{
 	if (tmpNick1S)
 		lineInfo->pUser = fisg_parse_user(pStats, tmpNick1);

	if (tmpNick2S)
		lineInfo->pUser2 = fisg_parse_user(pStats, tmpNick2);
 	}

 return !isOK;
}


void fisg_parse_url(char *inLine, t_user_entry *pUser, t_stats *pStats, t_fisgconfig *pCfg)
{
 int linePos;
 char c, urlStr[SET_MAX_BUF + 1];
 t_str_node *tmpS;
 assert(pUser);

 /* Get the text of the URL */
 linePos = 0;
 while (*inLine && (linePos < SET_MAX_BUF) &&
	(th_isalnum((c = *inLine)) ||
	(c == '.') || (c == ',') || (c == '/') ||
	(c == '-') || (c == '~') || (c == '?') ||
	(c == '&') || (c == '%') || (c == '_') ||
	(c == '=') || (c == ';') || (c == ':') ||
	(c == '^') || (c == '[') || (c == ']') ||
	(c == '-') || (c == '(') || (c == ')') ||
	(c == '#') ))
	urlStr[linePos++] = *(inLine++);

 urlStr[linePos] = 0;

 /* Delete non-relevant last character */
 if (th_isspace(*inLine))
	{
	linePos--;
	while ((linePos > 0) && ((urlStr[linePos] == ',')||
		(urlStr[linePos] == '.')))
		urlStr[linePos--] = 0;
	}
	
 /* Add the URL in list */
 tmpS = th_strhash_search(pStats->urlList, urlStr, FALSE); 
 if (tmpS)
	{
	/* Increase number of references */
	tmpS->nUsed++;
	} else {
	/* Add a new string */
	tmpS = th_strnode_new(urlStr, 1, NULL);
	th_strhash_insert(pStats->urlList, tmpS, FALSE);
	}
	
 /* Increase number of URLs said by user */
 pUser->nURLs++;
}


t_user_entry *fisg_parse_public(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg)
{
 t_lineinfo lineInfo;
 t_uint	nWords, nQuestions, nYelling;
 char *tmpStr;
 int linePos;
 BOOL isWord;

 if (!fmt) return NULL;

 /* Try to parse the line */
 if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats))
 	return NULL;


 /* If the text is empty, we don't need to analyze it */
 if (!lineInfo.pText[0])
 	return lineInfo.pUser;
 

 /* Detect HTTP-URLs */
 tmpStr = strstr(lineInfo.pText, "http://");
 if (tmpStr)
 	{
	tmpStr += strlen("http://");
	fisg_parse_url(tmpStr, lineInfo.pUser, pStats, pCfg);
	}


 /* Statisticize the actual public message-line */
 linePos = 0;
 isWord = FALSE;
 nQuestions = nYelling = nWords = 0;

 while (lineInfo.pText[linePos])
	{
	if (isWord && th_isspace(lineInfo.pText[linePos]))
		{
		nWords++;
		isWord = FALSE;
		} else
	if ((!isWord) && !th_isspace(lineInfo.pText[linePos]))
		{
		isWord = TRUE;

		switch (lineInfo.pText[linePos]) {
		case '=':
		case ':':
		case ';':
			switch (lineInfo.pText[linePos + 1]) {
			case ')': /* :) */
			case 'D': /* :D */
			case 'P': /* :P */
			case '>': /* :> */
			case ']': /* :] */
				lineInfo.pUser->fHappiness++;
				break;
	
			case '(': /* :( */
			case '[': /* :[ */
			case '/': /* :/ */
			case 'I': /* :I */
				lineInfo.pUser->fHappiness--;
				break;
			}
			break;

		case '(':
		case '<':
			switch (lineInfo.pText[linePos + 1]) {
			case ':':
			case ';':
				lineInfo.pUser->fHappiness++;
				break;

			case '3':
				lineInfo.pUser->nLove++;
				break;
			}
			break;
			
		case ')':
		case '>':
			switch (lineInfo.pText[linePos + 1]) {
			case ':':
			case ';':
				lineInfo.pUser->fHappiness--;
				break;
			}
			break;
		}
		}

	if (th_isupper(lineInfo.pText[linePos]))
		lineInfo.pUser->nCaps++;

	switch (lineInfo.pText[linePos]) {
	case '!':
		nYelling++;
		break;
	
	case '?':
		nQuestions++;
		break;
	}

	lineInfo.pUser->nChars++;
	linePos++;
	}

 /* Add to user's stats */
 if (nYelling) lineInfo.pUser->nYelling++;
 if (nQuestions) lineInfo.pUser->nQuestions++;
 
 lineInfo.pUser->nWords += nWords;
 lineInfo.pUser->nPublics++;

 if ((lineInfo.ts.iHours >= 0) && (lineInfo.ts.iHours < SET_HOURS_DAY))
	{
	lineInfo.pUser->nWordsPerHour[lineInfo.ts.iHours] += nWords;
	lineInfo.pUser->nPublicsPerHour[lineInfo.ts.iHours]++;

	if (lineInfo.pUser->nWords >=
		(lineInfo.pUser->nWordsPerHour[lineInfo.ts.iHours] /
		(lineInfo.pUser->nPublicsPerHour[lineInfo.ts.iHours]+1)))
		{
		if ((!lineInfo.pUser->sComment) || (random() < (RAND_MAX / 3)))
		if ((strlen(lineInfo.pText) >= pCfg->commentMinLength) &&
		    (strlen(lineInfo.pText) <= pCfg->commentMaxLength))
			{
				if (pCfg->stripCtrlChars)
					th_strip_ctrlchars(lineInfo.pText);

				th_strcpy(&lineInfo.pUser->sComment, lineInfo.pText);
			}
		}
	}

 /* Done, ok. */ 
 return lineInfo.pUser;
}


int fisg_parse_nickchange(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg)
{
 t_lineinfo lineInfo;
 int i;
 
 if (!fmt) return -1;
 
 /* Try to parse the line */
 if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats))
 	return -1;

 /* Let's see if we can autofollow the nick-changes */
 if (pCfg->autoFollowNicks && (lineInfo.pUser != lineInfo.pUser2))
 {
NDMSG("['%s' -> '%s'] -- ", lineInfo.pUser->userHandle, lineInfo.pUser2->userHandle);
 if (lineInfo.pUser->isManaged && !lineInfo.pUser2->isManaged)
	{
NDPRINT("'%s' is alias to '%s'\n", lineInfo.pUser2->userHandle, lineInfo.pUser->userHandle);
	th_strhash_change_pdata(pStats->nickList, lineInfo.pUser2, lineInfo.pUser);
	user_delete(pStats->usersList, lineInfo.pUser2);
	user_free(lineInfo.pUser2);
	lineInfo.pUser->nNickChanges++;
	} else
 if (!lineInfo.pUser->isManaged && lineInfo.pUser2->isManaged)
 	{
NDPRINT("'%s' is alias to '%s'\n", lineInfo.pUser->userHandle, lineInfo.pUser2->userHandle);

	th_strhash_change_pdata(pStats->nickList, lineInfo.pUser, lineInfo.pUser2);
	user_delete(pStats->usersList, lineInfo.pUser);
	user_free(lineInfo.pUser);
	lineInfo.pUser2->nNickChanges++;
 	} else
 if (pCfg->autoHeuristics)
 	{
 	/*
 	 * Let's try to determine the "real" user with simple heuristics
 	 */
NDPRINT("guessing... %i - ", pCfg->autoHeuristics);

	i = 0;
 	if (strlen(lineInfo.pUser->userHandle) < strlen(lineInfo.pUser2->userHandle))
		i--;
		else
		i++;

	if (th_strmatch(lineInfo.pUser2->userHandle, lineInfo.pUser->userHandle))
		i--;
		
	if (th_strmatch(lineInfo.pUser->userHandle, lineInfo.pUser2->userHandle))
		i++;
		
	if (th_strmatch(lineInfo.pUser2->userHandle, "*^*") || th_strmatch(lineInfo.pUser2->userHandle, "*_*"))
		i -= 2;

	if (th_strmatch(lineInfo.pUser->userHandle, "*^*") || th_strmatch(lineInfo.pUser->userHandle, "*_*"))
		i += 2;

	if (i <= 0)
		{
NDPRINT("'%s' is alias to '%s'\n", lineInfo.pUser2->userHandle, lineInfo.pUser->userHandle);

		th_strhash_change_pdata(pStats->nickList, lineInfo.pUser2, lineInfo.pUser);
		user_delete(pStats->usersList, lineInfo.pUser2);
		user_free(lineInfo.pUser2);
		lineInfo.pUser->nNickChanges++;
		lineInfo.pUser->isManaged = TRUE;
		} else {
NDPRINT("'%s' is alias to '%s'\n", lineInfo.pUser->userHandle, lineInfo.pUser2->userHandle);

		th_strhash_change_pdata(pStats->nickList, lineInfo.pUser, lineInfo.pUser2);
		user_delete(pStats->usersList, lineInfo.pUser);
		user_free(lineInfo.pUser);
		lineInfo.pUser2->nNickChanges++;
		lineInfo.pUser2->isManaged = TRUE;
		}
 	} else
NDPRINT("fail.\n");
 } else {
 /* Update the stats */
 lineInfo.pUser->nNickChanges++;
 lineInfo.pUser2->nNickChanges++; 
 }
 
 /* Done, ok. */ 
 return 0;
}


t_user_entry *fisg_parse_misc(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg)
{
 t_lineinfo lineInfo;

 if (!fmt) return NULL;

 /* Try to parse the line */
 if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats))
 	return NULL;

 /* Done, ok. */ 
 return lineInfo.pUser;
}


int fisg_parse_kick(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg)
{
 t_lineinfo lineInfo;

 if (!fmt) return -1;

 /* Try to parse the line */
 if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats))
 	return -1;

 /* Add to user's stats */
 lineInfo.pUser->nGotKicked++;
 lineInfo.pUser2->nKicks++;
 
 /* Done, ok. */ 
 return 0;
}


int fisg_parse_topicchange(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg)
{
 t_lineinfo lineInfo;
 t_str_node *tmpS;

 if (!fmt) return -1;

 /* Try to parse the line */
 if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats))
 	return -2;

 /* Add to user's stats and topic list */
 lineInfo.pUser->nTopics++;
 
 tmpS = th_strnode_new(lineInfo.pText, 1, (void *) lineInfo.pUser);
 th_strlist_insert(&pStats->topicList, tmpS);

 /* Done, ok. */ 
 return 0;
}


/*
 * A generic logfile parser
 */
int fisg_parse_log(FILE *inFile, t_stats *pStats, t_logformat *logFmt, t_fisgconfig *pCfg)
{
 char inLine[SET_MAX_BUF + 1];
 size_t lineNum, linePos;
 t_user_entry *tmpUser;

 /* Initial stats */
 pStats->nLogFiles++;

 /* Read and parse the data */
 lineNum = 0;
 while (fgets(inLine, SET_MAX_BUF, inFile) != NULL)
 {
 linePos = 0;
 while (inLine[linePos] && !th_iscrlf(inLine[linePos])) linePos++;
 inLine[linePos] = 0;
 pStats->nChars += linePos;
 pStats->nLines++;
 lineNum++;
 linePos = 0;

 /* Check if the line is OK and what type it is */ 
 if (inLine[0])
	{
	if (!fisg_parse_public(inLine, logFmt->fmtPublic, pStats, pCfg))
	if (!fisg_parse_public(inLine, logFmt->fmtNotice, pStats, pCfg))
		{
		if ((tmpUser = fisg_parse_public(inLine, logFmt->fmtAction, pStats, pCfg)))
			tmpUser->nActions++;
			else
		if ((tmpUser = fisg_parse_public(inLine, logFmt->fmtNotice, pStats, pCfg)))
			tmpUser->nNotices++;
			else
		if ((tmpUser = fisg_parse_misc(inLine, logFmt->fmtJoin, pStats, pCfg)))
			tmpUser->nJoins++;
			else
		if (fisg_parse_topicchange(inLine, logFmt->fmtTopicChange, pStats, pCfg))
		if (fisg_parse_kick(inLine, logFmt->fmtKick, pStats, pCfg))
		if (fisg_parse_nickchange(inLine, logFmt->fmtNickChange, pStats, pCfg))
			{
			}
		}
 	}
 
 } /* while */

 return 0;
}



syntax highlighted by Code2HTML, v. 0.9.1