/*
* FISG - Logfile parser
* Programmed and designed by Matti 'ccr' Hamalainen
* (C) Copyright 2003-2004 Tecnic Software productions (TNSP)
*
* Please read file 'COPYING' for information on license and distribution.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "fisg.h"
#include "th_util.h"
#include "th_config.h"
#include "th_string.h"
#include "in_formats.h"
/*
* Parsing functions
*/
int fisg_parse_int(char *inLine, size_t iLen, size_t *linePos)
{
int iResult = 0;
while (th_isdigit(inLine[*linePos]) && (iLen--))
{
iResult *= 10;
iResult += (inLine[(*linePos)++] - '0');
}
return iResult;
}
t_user_entry *fisg_parse_user(t_stats *pStats, char *newNick)
{
t_user_entry *tmpUser;
t_str_node *tmpNick;
/* Check if nick matches existing user record */
tmpNick = nickhash_search(pStats->nickList, newNick);
if (tmpNick)
{
/* Yes, increase number of uses */
tmpNick->nUsed++;
return tmpNick->pData;
} else {
/* No, we need to create a new one */
tmpUser = user_new(newNick);
tmpNick = th_strnode_new(newNick, 1, tmpUser);
/* Insert into nicklist */
if (nickhash_insert(pStats->nickList, tmpNick) != 0)
{
/* Failed, due to hash */
THERR("nickhash_insert() failed, hash: '%s'\n", newNick);
user_free(tmpUser);
th_strnode_free(tmpNick);
return NULL;
}
/* Insert into userlist */
user_insert(pStats->usersList, tmpUser);
return tmpUser;
}
}
int fisg_parse_generic(char *inLine, char *fmt, t_lineinfo *lineInfo, t_stats *pStats)
{
size_t linePos, i;
BOOL isOK, isEnd, tmpNick1S = FALSE, tmpNick2S = FALSE;
t_user_entry *tmpUser;
char tmpStr[SET_MAX_NICKLEN + 1] = "",
tmpNick1[SET_MAX_NICKLEN + 1],
tmpNick2[SET_MAX_NICKLEN + 1],
tmpDest, c;
if (!fmt) return -1;
/* Initialize */
linePos = 0;
tmpUser = NULL;
isOK = TRUE;
/* Parse the line via format-string */
while (*fmt && isOK)
{
if (*fmt == '%')
{
switch (*(++fmt)) {
/* Generic matching */
case '?':
/* Match anything */
fmt++;
if (inLine[linePos])
linePos++;
else
isOK = FALSE;
break;
case '*':
/* Match anything until next char */
fmt++;
while (inLine[linePos] && (inLine[linePos] != *fmt)) linePos++;
break;
case '@':
/* Match irssi style optional '@|+| ' */
fmt++;
if (!inLine[linePos]) isOK = FALSE;
if ((inLine[linePos] == '@') ||
(inLine[linePos] == '+') ||
th_isspace(inLine[linePos]))
linePos++;
break;
/* Timestamps */
case 'H': lineInfo->ts.iHours = fisg_parse_int(inLine, 2, &linePos); fmt++; break;
case 'M': lineInfo->ts.iMinutes = fisg_parse_int(inLine, 2, &linePos); fmt++; break;
case 'S': lineInfo->ts.iSeconds = fisg_parse_int(inLine, 2, &linePos); fmt++; break;
case 'Y': lineInfo->ts.iYear = fisg_parse_int(inLine, 4, &linePos); fmt++; break;
case 'y':
/* 2-digit year */
i = fisg_parse_int(inLine, 2, &linePos);
if (i < 70)
i += 2000;
else
i += 1900;
lineInfo->ts.iYear = i;
fmt++;
break;
case 'd': lineInfo->ts.iDay = fisg_parse_int(inLine, 2, &linePos); fmt++; break;
case 'j': lineInfo->ts.iMonth = fisg_parse_int(inLine, 2, &linePos); fmt++; break;
/* Special matches */
case 'n':
case 'N':
/* Nick */
tmpDest = *fmt;
fmt++;
/* Find the start of the nick */
th_findnext(inLine, &linePos);
/* Get the nick to temp buffer */
i = 0; isEnd = FALSE;
c = inLine[linePos];
if (!th_isalpha(c) && !th_isspecial(c)) isOK = FALSE;
while (isOK && !isEnd)
{
c = inLine[linePos];
if (!c || (c == *fmt) || th_isspace(c) || (i >= SET_MAX_NICKLEN))
isEnd = TRUE;
else
{
if (th_isalpha(c) || th_isdigit(c) || th_isspecial(c) || (c == '-'))
tmpStr[i++] = inLine[linePos++];
else
isOK = FALSE;
}
}
tmpStr[i++] = 0;
while (inLine[linePos] && th_isspace(inLine[linePos]) && (inLine[linePos] != *fmt)) linePos++;
if (inLine[linePos] != *fmt) isOK = FALSE;
/* Find user or add new */
if (isOK && (i > 0))
switch (tmpDest) {
case 'n': tmpNick1S = TRUE; strcpy(tmpNick1, tmpStr); break;
case 'N': tmpNick2S = TRUE; strcpy(tmpNick2, tmpStr); break;
}
break;
case 'm':
/* Mode */
fmt++;
while (inLine[linePos] && (inLine[linePos] != *fmt)) linePos++;
break;
case 'c':
/* Channel */
fmt++;
while (inLine[linePos] && (inLine[linePos] != *fmt)) linePos++;
break;
case 't':
/* Text */
fmt++;
i = 0;
while (inLine[linePos] && (inLine[linePos] != *fmt) && (i < SET_MAX_BUF))
lineInfo->pText[i++] = inLine[linePos++];
lineInfo->pText[i++] = 0;
break;
/* Error */
default:
THERR("Syntax error in format-string '%s'\n", fmt);
return -1;
}
} else {
/* Check matches */
if (*fmt != inLine[linePos])
isOK = FALSE;
fmt++;
linePos++;
}
} /* while(*fmt) */
if (isOK)
{
if (tmpNick1S)
lineInfo->pUser = fisg_parse_user(pStats, tmpNick1);
if (tmpNick2S)
lineInfo->pUser2 = fisg_parse_user(pStats, tmpNick2);
}
return !isOK;
}
void fisg_parse_url(char *inLine, t_user_entry *pUser, t_stats *pStats, t_fisgconfig *pCfg)
{
int linePos;
char c, urlStr[SET_MAX_BUF + 1];
t_str_node *tmpS;
assert(pUser);
/* Get the text of the URL */
linePos = 0;
while (*inLine && (linePos < SET_MAX_BUF) &&
(th_isalnum((c = *inLine)) ||
(c == '.') || (c == ',') || (c == '/') ||
(c == '-') || (c == '~') || (c == '?') ||
(c == '&') || (c == '%') || (c == '_') ||
(c == '=') || (c == ';') || (c == ':') ||
(c == '^') || (c == '[') || (c == ']') ||
(c == '-') || (c == '(') || (c == ')') ||
(c == '#') ))
urlStr[linePos++] = *(inLine++);
urlStr[linePos] = 0;
/* Delete non-relevant last character */
if (th_isspace(*inLine))
{
linePos--;
while ((linePos > 0) && ((urlStr[linePos] == ',')||
(urlStr[linePos] == '.')))
urlStr[linePos--] = 0;
}
/* Add the URL in list */
tmpS = th_strhash_search(pStats->urlList, urlStr, FALSE);
if (tmpS)
{
/* Increase number of references */
tmpS->nUsed++;
} else {
/* Add a new string */
tmpS = th_strnode_new(urlStr, 1, NULL);
th_strhash_insert(pStats->urlList, tmpS, FALSE);
}
/* Increase number of URLs said by user */
pUser->nURLs++;
}
t_user_entry *fisg_parse_public(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg)
{
t_lineinfo lineInfo;
t_uint nWords, nQuestions, nYelling;
char *tmpStr;
int linePos;
BOOL isWord;
if (!fmt) return NULL;
/* Try to parse the line */
if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats))
return NULL;
/* If the text is empty, we don't need to analyze it */
if (!lineInfo.pText[0])
return lineInfo.pUser;
/* Detect HTTP-URLs */
tmpStr = strstr(lineInfo.pText, "http://");
if (tmpStr)
{
tmpStr += strlen("http://");
fisg_parse_url(tmpStr, lineInfo.pUser, pStats, pCfg);
}
/* Statisticize the actual public message-line */
linePos = 0;
isWord = FALSE;
nQuestions = nYelling = nWords = 0;
while (lineInfo.pText[linePos])
{
if (isWord && th_isspace(lineInfo.pText[linePos]))
{
nWords++;
isWord = FALSE;
} else
if ((!isWord) && !th_isspace(lineInfo.pText[linePos]))
{
isWord = TRUE;
switch (lineInfo.pText[linePos]) {
case '=':
case ':':
case ';':
switch (lineInfo.pText[linePos + 1]) {
case ')': /* :) */
case 'D': /* :D */
case 'P': /* :P */
case '>': /* :> */
case ']': /* :] */
lineInfo.pUser->fHappiness++;
break;
case '(': /* :( */
case '[': /* :[ */
case '/': /* :/ */
case 'I': /* :I */
lineInfo.pUser->fHappiness--;
break;
}
break;
case '(':
case '<':
switch (lineInfo.pText[linePos + 1]) {
case ':':
case ';':
lineInfo.pUser->fHappiness++;
break;
case '3':
lineInfo.pUser->nLove++;
break;
}
break;
case ')':
case '>':
switch (lineInfo.pText[linePos + 1]) {
case ':':
case ';':
lineInfo.pUser->fHappiness--;
break;
}
break;
}
}
if (th_isupper(lineInfo.pText[linePos]))
lineInfo.pUser->nCaps++;
switch (lineInfo.pText[linePos]) {
case '!':
nYelling++;
break;
case '?':
nQuestions++;
break;
}
lineInfo.pUser->nChars++;
linePos++;
}
/* Add to user's stats */
if (nYelling) lineInfo.pUser->nYelling++;
if (nQuestions) lineInfo.pUser->nQuestions++;
lineInfo.pUser->nWords += nWords;
lineInfo.pUser->nPublics++;
if ((lineInfo.ts.iHours >= 0) && (lineInfo.ts.iHours < SET_HOURS_DAY))
{
lineInfo.pUser->nWordsPerHour[lineInfo.ts.iHours] += nWords;
lineInfo.pUser->nPublicsPerHour[lineInfo.ts.iHours]++;
if (lineInfo.pUser->nWords >=
(lineInfo.pUser->nWordsPerHour[lineInfo.ts.iHours] /
(lineInfo.pUser->nPublicsPerHour[lineInfo.ts.iHours]+1)))
{
if ((!lineInfo.pUser->sComment) || (random() < (RAND_MAX / 3)))
if ((strlen(lineInfo.pText) >= pCfg->commentMinLength) &&
(strlen(lineInfo.pText) <= pCfg->commentMaxLength))
{
if (pCfg->stripCtrlChars)
th_strip_ctrlchars(lineInfo.pText);
th_strcpy(&lineInfo.pUser->sComment, lineInfo.pText);
}
}
}
/* Done, ok. */
return lineInfo.pUser;
}
int fisg_parse_nickchange(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg)
{
t_lineinfo lineInfo;
int i;
if (!fmt) return -1;
/* Try to parse the line */
if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats))
return -1;
/* Let's see if we can autofollow the nick-changes */
if (pCfg->autoFollowNicks && (lineInfo.pUser != lineInfo.pUser2))
{
NDMSG("['%s' -> '%s'] -- ", lineInfo.pUser->userHandle, lineInfo.pUser2->userHandle);
if (lineInfo.pUser->isManaged && !lineInfo.pUser2->isManaged)
{
NDPRINT("'%s' is alias to '%s'\n", lineInfo.pUser2->userHandle, lineInfo.pUser->userHandle);
th_strhash_change_pdata(pStats->nickList, lineInfo.pUser2, lineInfo.pUser);
user_delete(pStats->usersList, lineInfo.pUser2);
user_free(lineInfo.pUser2);
lineInfo.pUser->nNickChanges++;
} else
if (!lineInfo.pUser->isManaged && lineInfo.pUser2->isManaged)
{
NDPRINT("'%s' is alias to '%s'\n", lineInfo.pUser->userHandle, lineInfo.pUser2->userHandle);
th_strhash_change_pdata(pStats->nickList, lineInfo.pUser, lineInfo.pUser2);
user_delete(pStats->usersList, lineInfo.pUser);
user_free(lineInfo.pUser);
lineInfo.pUser2->nNickChanges++;
} else
if (pCfg->autoHeuristics)
{
/*
* Let's try to determine the "real" user with simple heuristics
*/
NDPRINT("guessing... %i - ", pCfg->autoHeuristics);
i = 0;
if (strlen(lineInfo.pUser->userHandle) < strlen(lineInfo.pUser2->userHandle))
i--;
else
i++;
if (th_strmatch(lineInfo.pUser2->userHandle, lineInfo.pUser->userHandle))
i--;
if (th_strmatch(lineInfo.pUser->userHandle, lineInfo.pUser2->userHandle))
i++;
if (th_strmatch(lineInfo.pUser2->userHandle, "*^*") || th_strmatch(lineInfo.pUser2->userHandle, "*_*"))
i -= 2;
if (th_strmatch(lineInfo.pUser->userHandle, "*^*") || th_strmatch(lineInfo.pUser->userHandle, "*_*"))
i += 2;
if (i <= 0)
{
NDPRINT("'%s' is alias to '%s'\n", lineInfo.pUser2->userHandle, lineInfo.pUser->userHandle);
th_strhash_change_pdata(pStats->nickList, lineInfo.pUser2, lineInfo.pUser);
user_delete(pStats->usersList, lineInfo.pUser2);
user_free(lineInfo.pUser2);
lineInfo.pUser->nNickChanges++;
lineInfo.pUser->isManaged = TRUE;
} else {
NDPRINT("'%s' is alias to '%s'\n", lineInfo.pUser->userHandle, lineInfo.pUser2->userHandle);
th_strhash_change_pdata(pStats->nickList, lineInfo.pUser, lineInfo.pUser2);
user_delete(pStats->usersList, lineInfo.pUser);
user_free(lineInfo.pUser);
lineInfo.pUser2->nNickChanges++;
lineInfo.pUser2->isManaged = TRUE;
}
} else
NDPRINT("fail.\n");
} else {
/* Update the stats */
lineInfo.pUser->nNickChanges++;
lineInfo.pUser2->nNickChanges++;
}
/* Done, ok. */
return 0;
}
t_user_entry *fisg_parse_misc(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg)
{
t_lineinfo lineInfo;
if (!fmt) return NULL;
/* Try to parse the line */
if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats))
return NULL;
/* Done, ok. */
return lineInfo.pUser;
}
int fisg_parse_kick(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg)
{
t_lineinfo lineInfo;
if (!fmt) return -1;
/* Try to parse the line */
if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats))
return -1;
/* Add to user's stats */
lineInfo.pUser->nGotKicked++;
lineInfo.pUser2->nKicks++;
/* Done, ok. */
return 0;
}
int fisg_parse_topicchange(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg)
{
t_lineinfo lineInfo;
t_str_node *tmpS;
if (!fmt) return -1;
/* Try to parse the line */
if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats))
return -2;
/* Add to user's stats and topic list */
lineInfo.pUser->nTopics++;
tmpS = th_strnode_new(lineInfo.pText, 1, (void *) lineInfo.pUser);
th_strlist_insert(&pStats->topicList, tmpS);
/* Done, ok. */
return 0;
}
/*
* A generic logfile parser
*/
int fisg_parse_log(FILE *inFile, t_stats *pStats, t_logformat *logFmt, t_fisgconfig *pCfg)
{
char inLine[SET_MAX_BUF + 1];
size_t lineNum, linePos;
t_user_entry *tmpUser;
/* Initial stats */
pStats->nLogFiles++;
/* Read and parse the data */
lineNum = 0;
while (fgets(inLine, SET_MAX_BUF, inFile) != NULL)
{
linePos = 0;
while (inLine[linePos] && !th_iscrlf(inLine[linePos])) linePos++;
inLine[linePos] = 0;
pStats->nChars += linePos;
pStats->nLines++;
lineNum++;
linePos = 0;
/* Check if the line is OK and what type it is */
if (inLine[0])
{
if (!fisg_parse_public(inLine, logFmt->fmtPublic, pStats, pCfg))
if (!fisg_parse_public(inLine, logFmt->fmtNotice, pStats, pCfg))
{
if ((tmpUser = fisg_parse_public(inLine, logFmt->fmtAction, pStats, pCfg)))
tmpUser->nActions++;
else
if ((tmpUser = fisg_parse_public(inLine, logFmt->fmtNotice, pStats, pCfg)))
tmpUser->nNotices++;
else
if ((tmpUser = fisg_parse_misc(inLine, logFmt->fmtJoin, pStats, pCfg)))
tmpUser->nJoins++;
else
if (fisg_parse_topicchange(inLine, logFmt->fmtTopicChange, pStats, pCfg))
if (fisg_parse_kick(inLine, logFmt->fmtKick, pStats, pCfg))
if (fisg_parse_nickchange(inLine, logFmt->fmtNickChange, pStats, pCfg))
{
}
}
}
} /* while */
return 0;
}
syntax highlighted by Code2HTML, v. 0.9.1