/*
* probability.c - Client identifier probability engine
* Copyright (C) 2005 Trevor Talbot and
* the DALnet coding team
*
* See file AUTHORS in IRC package for additional names of
* the programmers.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 1, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: probability.c,v 1.4 2006/01/07 22:13:26 trystanscott Exp $ */
#include "struct.h"
#include "common.h"
#include "sys.h"
#include "h.h"
#include "memcount.h"
#ifdef RWHO_PROBABILITY
/* minimum and maximum character values to calculate probability for */
#define PMINCHAR 0x20
#define PMAXCHAR 0x7e
#define BASESIZE (PMAXCHAR+1-PMINCHAR)
#define PCS_NICK 0x1
#define PCS_USER 0x2
#define PCS_GCOS 0x4
/* number of times each character pair currently appears on the network */
static int probabilities[BASESIZE][BASESIZE];
/* map of characters to scale for */
static char pcharset[256];
/* nick/user/gcos count->percent scale factors */
static double nscale;
static double uscale;
static double gscale;
/* nick/user/gcos unscaled averages */
static int navg;
static int uavg;
static int gavg;
/* state flags */
static char ploadedsets;
static char pfoldedsets;
/* averaging functions */
static int (*navgfunc)(unsigned char *, int);
static int (*uavgfunc)(unsigned char *, int);
static int (*gavgfunc)(unsigned char *, int);
#if 0 /* currently unused */
static void pload_adjacent(char *s, int inc)
{
unsigned char c1, c2;
while (1)
{
c1 = *s++;
c2 = *s;
if (!c1 || !c2)
break;
if (c2 < PMINCHAR || c2 > PMAXCHAR)
{
s++;
continue;
}
if (c1 < PMINCHAR || c1 > PMAXCHAR)
continue;
c1 -= PMINCHAR;
c2 -= PMINCHAR;
probabilities[c1][c2] += inc;
}
}
#endif
static void pload(char *s, int inc)
{
unsigned char c1, c2;
while (*s)
{
c1 = *s++;
if (c1 < PMINCHAR || c1 > PMAXCHAR)
continue;
/* skip only the low range */
for (c2 = *s; c2; c2 = *++s)
if (c2 >= PMINCHAR)
break;
if (!c2)
break;
if (c2 > PMAXCHAR)
{
s++;
continue;
}
c1 -= PMINCHAR;
c2 -= PMINCHAR;
probabilities[c1][c2] += inc;
}
}
static int pavg_adjacent(unsigned char *s, int type)
{
unsigned char c1, c2, lc1, lc2;
int count = 0;
int total = 0;
if (pfoldedsets & type)
{
while(1)
{
c1 = *s++;
c2 = *s;
if (!c1 || !c2)
break;
if (!(pcharset[c2] & type))
{
s++;
continue;
}
if (!(pcharset[c2] & type))
continue;
lc1 = ToLower(c1) - PMINCHAR;
lc2 = ToLower(c2) - PMINCHAR;
c1 = ToUpper(c1) - PMINCHAR;
c2 = ToUpper(c2) - PMINCHAR;
count++;
total += probabilities[c1][c2];
if (lc1 != c1)
{
total += probabilities[lc1][c2];
if (lc2 != c2)
total += probabilities[lc1][lc2];
}
if (lc2 != c2)
total += probabilities[c1][lc2];
}
}
else
{
while(1)
{
c1 = *s++;
c2 = *s;
if (!c1 || !c2)
break;
if (!(pcharset[c2] & type))
{
s++;
continue;
}
if (!(pcharset[c2] & type))
continue;
c1 -= PMINCHAR;
c2 -= PMINCHAR;
total += probabilities[c1][c2];
count++;
}
}
if (!count)
return -1;
return (total/count);
}
static int pavg_skip(unsigned char *s, int type)
{
unsigned char c1, c2, lc1, lc2;
int count = 0;
int total = 0;
if (pfoldedsets & type)
{
while(*s)
{
c1 = *s++;
if (!(pcharset[c1] & type))
continue;
for (c2 = *s; c2; c2 = *++s)
if (pcharset[c2] & type)
break;
if (!c2)
break;
lc1 = ToLower(c1) - PMINCHAR;
lc2 = ToLower(c2) - PMINCHAR;
c1 = ToUpper(c1) - PMINCHAR;
c2 = ToUpper(c2) - PMINCHAR;
count++;
total += probabilities[c1][c2];
if (lc1 != c1)
{
total += probabilities[lc1][c2];
if (lc2 != c2)
total += probabilities[lc1][lc2];
}
if (lc2 != c2)
total += probabilities[c1][lc2];
}
}
else
{
while (*s)
{
c1 = *s++;
if (!(pcharset[c1] & type))
continue;
for (c2 = *s; c2; c2 = *++s)
if (pcharset[c2] & type)
break;
if (!c2)
break;
c1 -= PMINCHAR;
c2 -= PMINCHAR;
total += probabilities[c1][c2];
count++;
}
}
if (!count)
return -1;
return (total/count);
}
static void set_probabilities(void)
{
int ncount = 0;
int ucount = 0;
int gcount = 0;
unsigned int ntotal = 0;
unsigned int utotal = 0;
unsigned int gtotal = 0;
int nmax = 0;
int umax = 0;
int gmax = 0;
int i;
aClient *ac;
for (ac = client; ac; ac = ac->next)
{
if (!IsPerson(ac))
continue;
i = navgfunc((unsigned char *) ac->name, PCS_NICK);
if (i >= 0)
{
ncount++;
ntotal += i;
if (i > nmax)
nmax = i;
}
i = uavgfunc((unsigned char *) ac->user->username, PCS_USER);
if (i >= 0)
{
ucount++;
utotal += i;
if (i > umax)
umax = i;
}
i = gavgfunc((unsigned char *) ac->info, PCS_GCOS);
if (i >= 0)
{
gcount++;
gtotal += i;
if (i > gmax)
gmax = i;
}
}
if (ntotal)
{
navg = ntotal / ncount;
nscale = 100.0 / nmax;
}
if (utotal)
{
uavg = utotal / ucount;
uscale = 100.0 / umax;
}
if (gtotal)
{
gavg = gtotal / gcount;
gscale = 100.0 / gmax;
}
}
void probability_add(aClient *ac)
{
pload(ac->name, 1);
pload(ac->user->username, 1);
pload(ac->info, 1);
}
void probability_remove(aClient *ac)
{
pload(ac->name, -1);
pload(ac->user->username, -1);
pload(ac->info, -1);
}
void probability_change(char *old, char *new)
{
pload(old, -1);
pload(new, 1);
}
/* Initialize tables. Call before setting custom charsets. */
void probability_init(void)
{
ploadedsets = 0;
pfoldedsets = 0;
memset(pcharset, 0, 256);
navg = 50;
uavg = 50;
gavg = 50;
nscale = 1.0;
uscale = 1.0;
gscale = 1.0;
navgfunc = pavg_skip;
uavgfunc = pavg_skip;
gavgfunc = pavg_skip;
}
/* Parse a custom charset. */
int probability_loadsets(char *text)
{
char *s, *end;
int val, val2, set, i;
s = text;
while (*s)
{
switch (*s)
{
case 'n':
pfoldedsets |= PCS_NICK;
case 'N':
set = PCS_NICK;
if (s[1] == 'a')
{
navgfunc = pavg_adjacent;
s++;
}
break;
case 'u':
pfoldedsets |= PCS_USER;
case 'U':
set = PCS_USER;
if (s[1] == 'a')
{
uavgfunc = pavg_adjacent;
s++;
}
break;
case 'g':
pfoldedsets |= PCS_GCOS;
case 'G':
set = PCS_GCOS;
if (s[1] == 'a')
{
gavgfunc = pavg_adjacent;
s++;
}
break;
default:
return 0;
}
ploadedsets |= set;
while (*s)
{
/* parse first value */
s++;
val = strtol(s, &end, 0);
if (end == s)
return 0;
if (val < PMINCHAR || val > PMAXCHAR)
return 0;
pcharset[val] |= set;
s = end;
/* if it's a range, parse second value */
if (*s == '-')
{
s++;
val2 = strtol(s, &end, 0);
if (end == s)
return 0;
if (val2 < PMINCHAR || val2 > PMAXCHAR)
return 0;
if (val2 < val)
return 0;
s = end;
for (i = val+1; i <= val2; i++)
pcharset[i] |= set;
}
/* if there are no more listed values, break out to next set */
if (*s != ',')
break;
}
}
return 1;
}
/* Finialize tables. Call after setting custom charsets (if any). */
void probability_fini(void)
{
/* load default sets if no custom ones loaded */
if (!(ploadedsets & PCS_NICK))
probability_loadsets("n48-57,65-90,97-122");
if (!(ploadedsets & PCS_USER))
probability_loadsets("u48-57,65-90,97-122");
if (!(ploadedsets & PCS_GCOS))
probability_loadsets("g65-90,97-122");
/* calculate scales and averages */
set_probabilities();
}
/* Get nick/user/gcos probabilities for client. */
void get_probabilities(aClient *ac, int *np, int *up, int *gp)
{
int p;
p = navgfunc((unsigned char *) ac->name, PCS_NICK);
*np = (p < 0 ? navg : p) * nscale;
p = uavgfunc((unsigned char *) ac->user->username, PCS_USER);
*up = (p < 0 ? uavg : p) * uscale;
p = gavgfunc((unsigned char *) ac->info, PCS_GCOS);
*gp = (p < 0 ? gavg : p) * gscale;
}
u_long
memcount_probability(MCprobability *mc)
{
mc->file = __FILE__;
mc->s_prob.c = 1;
mc->s_prob.m += sizeof(probabilities);
return 0;
}
#endif /* RWHO_PROBABILITY */
syntax highlighted by Code2HTML, v. 0.9.1