/*
* Copyright (c) 2003, 2004 Niels Provos <provos@citi.umich.edu>
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <sys/types.h>
#include <sys/param.h>
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <sys/stat.h>
#include <sys/queue.h>
#include <sys/tree.h>
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include <ctype.h>
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dnet.h>
#include <event.h>
#include <evdns.h>
#include "tagging.h"
#include "histogram.h"
#include "keycount.h"
#include "analyze.h"
#include "filter.h"
char *os_report_file = NULL;
char *port_report_file = NULL;
char *spammer_report_file = NULL;
char *country_report_file = NULL;
static int checkpoint_doreplay; /* externally set by honeydstats */
static struct event ev_analyze;
struct kctree oses;
struct kctree ports;
struct kctree spammers;
struct kctree countries;
struct kctree country_cache;
#define ROL64(x, b) (((x) << b) | ((x) >> (64 - b)))
#define ROR64(x, b) (((x) >> b) | ((x) << (64 - b)))
/*
* Thomas Wang's 64-bit hash function from
* www.concentric.net/~Ttwang/tech/inthash.htm
*/
static __inline uint64_t
longhash1(uint64_t key)
{
key += ~(key << 32);
key ^= ROR64(key, 22);
key += ~(key << 13);
key ^= ROR64(key, 8);
key += (key << 3);
key ^= ROR64(key, 15);
key += ~(key << 27);
key ^= ROR64(key, 31);
return key;
}
static __inline uint32_t
port_hash(const struct addr *src, const struct addr *dst)
{
uint32_t a = src->addr_ip;
uint32_t b = dst->addr_ip;
return ((uint32_t)(longhash1(((uint64_t)a << 32) | b)));
}
void
port_key_extract(struct keycount *keycount, void **pkey, size_t *pkeylen)
{
if ((*pkey = calloc(1, sizeof(uint16_t))) == NULL)
err(1, "%s: calloc");
memcpy(*pkey, keycount->key, sizeof(uint16_t));
*pkeylen = sizeof(uint16_t);
}
char *
port_key_print(void *key, size_t keylen)
{
static char sport[7];
snprintf(sport, sizeof(sport), "%d", *(uint16_t *)key);
return (sport);
}
void
spammer_key_extract(struct keycount *keycount, void **pkey, size_t *pkeylen)
{
if ((*pkey = calloc(1, keycount->keylen)) == NULL)
err(1, "%s: calloc");
memcpy(*pkey, keycount->key, keycount->keylen);
*pkeylen = keycount->keylen;
}
char *
spammer_key_print(void *key, size_t keylen)
{
struct addr addr;
addr_pack(&addr, ADDR_TYPE_IP, IP_ADDR_BITS, key, IP_ADDR_LEN);
return (addr_ntoa(&addr));
}
void
country_key_extract(struct keycount *keycount, void **pkey, size_t *pkeylen)
{
if ((*pkey = calloc(1, keycount->keylen)) == NULL)
err(1, "%s: calloc");
memcpy(*pkey, keycount->key, keycount->keylen);
*pkeylen = keycount->keylen;
}
char *
country_key_print(void *key, size_t keylen)
{
return (key);
}
struct aux {
SPLAY_HEAD(auxtree, auxkey) tree;
TAILQ_HEAD(auxq, auxkey) queue;
int limit;
int entries;
};
static int
aux_compare(struct auxkey *a, struct auxkey *b)
{
if (a->value < b->value)
return (-1);
if (a->value > b->value)
return (1);
return (0);
}
SPLAY_PROTOTYPE(auxtree, auxkey, node, aux_compare);
SPLAY_GENERATE(auxtree, auxkey, node, aux_compare);
void *
aux_create(void)
{
struct aux *aux;
if ((aux = calloc(1, sizeof(struct aux))) == NULL)
err(1, "%s: calloc");
SPLAY_INIT(&aux->tree);
TAILQ_INIT(&aux->queue);
aux->limit = 100000; /* Make better at some point */
return (aux);
}
void
aux_free(void *arg)
{
struct aux *aux = arg;
struct auxtree *tree = &aux->tree;
struct auxkey *key;
while ((key = SPLAY_ROOT(tree)) != NULL) {
SPLAY_REMOVE(auxtree, tree, key);
free(key);
}
free(arg);
}
/* Returns one if the key is new */
int
aux_enter(struct aux *aux, uint32_t value)
{
struct auxtree *tree = &aux->tree;
struct auxq *queue = &aux->queue;
struct auxkey tmp, *key;
tmp.value = value;
if ((key = SPLAY_FIND(auxtree, tree, &tmp)) != NULL) {
/* Mark this entry as recently used - LRU fashion */
TAILQ_REMOVE(queue, key, next);
TAILQ_INSERT_HEAD(queue, key, next);
return (0);
}
if (aux->entries >= aux->limit) {
key = TAILQ_LAST(queue, auxq);
/* The old entry has to go, bye bye */
SPLAY_REMOVE(auxtree, tree, key);
TAILQ_REMOVE(queue, key, next);
memset(key, 0, sizeof(struct auxkey));
aux->entries--;
} else {
if ((key = calloc(1, sizeof(struct auxkey))) == NULL)
err(1, "%s: calloc");
}
key->value = tmp.value;
/* Insert the new key */
SPLAY_INSERT(auxtree, tree, key);
TAILQ_INSERT_TAIL(queue, key, next);
aux->entries++;
return (1);
}
void
os_key_extract(struct keycount *keycount, void **pkey, size_t *pkeylen)
{
const char *key = keycount->key;
if ((*pkey = strdup(key)) == NULL)
err(1, "%s: strdup");
*pkeylen = strlen(key) + 1;
}
char *
os_key_print(void *key, size_t keylen)
{
return (key);
}
static int
report_compare(struct report *a, struct report *b)
{
return (key_compare(a->key, a->keylen, b->key, b->keylen));
}
SPLAY_HEAD(reporttree, report);
SPLAY_PROTOTYPE(reporttree, report, node, report_compare);
SPLAY_GENERATE(reporttree, report, node, report_compare);
void
analyze_init(void)
{
struct timeval tv;
evtimer_set(&ev_analyze, analyze_report_cb, &ev_analyze);
timerclear(&tv);
tv.tv_sec = ANALYZE_REPORT_INTERVAL;
evtimer_add(&ev_analyze, &tv);
SPLAY_INIT(&oses);
SPLAY_INIT(&ports);
SPLAY_INIT(&spammers);
SPLAY_INIT(&countries);
SPLAY_INIT(&country_cache);
evdns_init();
}
void
analyze_set_checkpoint_doreplay(int doit)
{
checkpoint_doreplay = doit;
}
void
analyze_record(const struct record *record)
{
if (record->dst_port == 25 && record->bytes != 0)
analyze_spammer_enter(&record->src, record->bytes);
/*
* Records may be duplicated if they carry extra payload hashes.
* In those cases, we need to ignore the connection information.
* We also want to ignore connection information for connections
* that were generated by the honeypots.
*/
if ((record->state & RECORD_STATE_NEW) == 0 ||
(record->flags & REC_FLAG_LOCAL) != 0)
return;
/* Enter OS fingerprint */
if (record->os_fp == NULL)
analyze_os_enter(&record->src, "unknown");
else
analyze_os_enter(&record->src, record->os_fp);
/* Enter Port Analysis */
analyze_port_enter(record->dst_port, &record->src, &record->dst);
/* Entry country analysis */
analyze_country_enter(&record->src, &record->dst);
}
void
analyze_spammer_enter(const struct addr *src, uint32_t bytes)
{
struct keycount tmpkey, *key;
tmpkey.key = &src->addr_ip;
tmpkey.keylen = sizeof(src->addr_ip);
if ((key = SPLAY_FIND(kctree, &spammers, &tmpkey)) == NULL) {
key = keycount_new(&src->addr_ip, sizeof(src->addr_ip),
NULL, NULL);
SPLAY_INSERT(kctree, &spammers, key);
}
count_increment(key->count, bytes);
}
struct country_state {
struct addr src;
struct addr dst;
int result_from_cache;
};
void
analyze_country_enter_cb(int result, char type, int count, int ttl,
void *addresses, void *arg)
{
struct country_state *state = arg;
struct addr *src = &state->src;
struct keycount tmpkey, *key;
char tld[20];
if (result != DNS_ERR_NONE || count != 1 || type != DNS_PTR) {
/* Enter into our negative cache */
if (!state->result_from_cache) {
tmpkey.key = &src->addr_ip;
tmpkey.keylen = sizeof(src->addr_ip);
key = SPLAY_FIND(kctree, &country_cache, &tmpkey);
if (!key) {
key = keycount_new(
&src->addr_ip,
sizeof(src->addr_ip),
NULL, NULL);
SPLAY_INSERT(kctree, &country_cache, key);
}
count_increment(key->count, 1);
}
strlcpy(tld, "unknown", sizeof(tld));
} else {
const char *hostname = *(char **)addresses;
int i;
/* Extract the country key */
for (i = strlen(hostname) - 1; i >= 0; --i) {
if (hostname[i] == '.') {
i += 1;
break;
}
}
strlcpy(tld, hostname + i, sizeof(tld));
for (i = 0; i < strlen(tld); i++) {
if (isdigit(tld[i])) {
strlcpy(tld, "unknown", sizeof(tld));
break;
}
tld[i] = tolower(tld[i]);
}
}
tmpkey.key = tld;
tmpkey.keylen = strlen(tmpkey.key) + 1;
if ((key = SPLAY_FIND(kctree, &countries, &tmpkey)) == NULL) {
key = keycount_new(tld, strlen(tld) + 1, aux_create, aux_free);
SPLAY_INSERT(kctree, &countries, key);
}
/* If the address is new, we are going to resolve it */
if (aux_enter(key->auxilary, port_hash(&state->src, &state->dst)))
count_increment(key->count, 1);
free(state);
}
void
analyze_country_enter(const struct addr *addr, const struct addr *dst)
{
struct keycount tmpkey, *key;
struct country_state *state = calloc(1, sizeof(struct country_state));
if (state == NULL)
err(1, "%s: calloc", __func__);
state->src = *addr;
state->dst = *dst;
/*
* Check if this IP returned a resolver error in the last hour.
*/
tmpkey.key = &addr->addr_ip;
tmpkey.keylen = sizeof(addr->addr_ip);
if ((key = SPLAY_FIND(kctree, &country_cache, &tmpkey)) != NULL) {
if (count_get_minute(key->count) ||
count_get_hour(key->count)) {
state->result_from_cache = 1;
analyze_country_enter_cb(DNS_ERR_NOTEXIST, DNS_PTR,
0, 0, NULL, state);
return;
}
}
if (!checkpoint_doreplay) {
struct in_addr in;
in.s_addr = addr->addr_ip;
evdns_resolve_reverse(&in, 0, analyze_country_enter_cb, state);
} else {
/*
* If we are replaying a checkpoint, we do not want to do
* async calls.
*/
struct hostent *hp = gethostbyaddr(
(const char *)&addr->addr_ip, IP_ADDR_LEN, AF_INET);
if (hp == NULL) {
analyze_country_enter_cb(DNS_ERR_NOTEXIST, DNS_PTR,
0, 0, NULL, state);
} else {
analyze_country_enter_cb(DNS_ERR_NONE, DNS_PTR,
1, 1200 /* ttl */, (void *)&hp->h_name, state);
}
}
}
void
analyze_os_enter(const struct addr *addr, const char *osfp)
{
struct keycount tmpkey, *key;
tmpkey.key = osfp;
tmpkey.keylen = strlen(osfp) + 1;
if ((key = SPLAY_FIND(kctree, &oses, &tmpkey)) == NULL) {
key = keycount_new(osfp, strlen(osfp) + 1,
aux_create, aux_free);
SPLAY_INSERT(kctree, &oses, key);
}
/* If the address is new, we are going to increase the counter */
if (aux_enter(key->auxilary, addr->addr_ip))
count_increment(key->count, 1);
}
void
analyze_port_enter(uint16_t port,
const struct addr *src, const struct addr *dst)
{
struct keycount tmpkey, *key;
tmpkey.key = &port;
tmpkey.keylen = sizeof(port);
if ((key = SPLAY_FIND(kctree, &ports, &tmpkey)) == NULL) {
key = keycount_new(&port, sizeof(port),
aux_create, aux_free);
SPLAY_INSERT(kctree, &ports, key);
}
/* If the address is new, we are going to increase the counter */
if (aux_enter(key->auxilary, port_hash(src, dst)))
count_increment(key->count, 1);
}
void
report_to_file(struct reporttree *tree, char *filename,
char *(*print)(void *, size_t))
{
static char line[1024];
FILE *fout;
/* We do not create report files while we are replaying a checkpoint */
if (checkpoint_doreplay)
return;
/* create a temporary file */
strlcpy(line, filename, sizeof(line));
strlcat(line, ".tmp", sizeof(line));
if ((fout = fopen(line, "w")) != NULL) {
report_print(tree, fout, print);
fclose(fout);
/* This is an atomic operation */
rename(line, filename);
chmod(filename, S_IRWXU | S_IRGRP | S_IROTH);
} else {
warn("%s: fopen('%s')", __func__, line);
}
}
void
report_print(struct reporttree *tree, FILE *out,
char *(*print)(void *, size_t))
{
struct report *report;
/* Now print the information in alphabetical order */
SPLAY_FOREACH(report, reporttree, tree) {
fprintf(out, "%25s: %7d %7d %7d\n",
print(report->key, report->keylen),
report->minute, report->hour, report->day);
}
}
void
report_free(struct reporttree *tree)
{
struct report *report;
/* And then free the whole tree */
while ((report = SPLAY_ROOT(tree)) != NULL) {
SPLAY_REMOVE(reporttree, tree, report);
free(report->key);
free(report);
}
free(tree);
}
struct reporttree *
report_create(struct kctree *kctree,
void (*extract)(struct keycount *, void **, size_t *))
{
struct reporttree *tree;
struct report *report;
struct keycount *kc, *next;
if ((tree = calloc(1, sizeof(struct reporttree))) == NULL)
err(1, "%s: calloc", __func__);
SPLAY_INIT(tree);
for (kc = SPLAY_MIN(kctree, kctree); kc != NULL; kc = next) {
struct report tmp;
uint32_t sum = 0;
next = SPLAY_NEXT(kctree, kctree, kc);
(*extract)(kc, &tmp.key, &tmp.keylen);
if ((report = SPLAY_FIND(reporttree, tree, &tmp)) == NULL) {
report = calloc(1, sizeof(struct report));
if (report == NULL)
err(1, "%s: calloc");
report->key = tmp.key;
report->keylen = tmp.keylen;
SPLAY_INSERT(reporttree, tree, report);
}
/* Now get the data together */
if ((sum = count_get_minute(kc->count)) != 0)
report->minute += sum;
if ((sum += count_get_hour(kc->count)) != 0)
report->hour += sum;
if ((sum += count_get_day(kc->count)) != 0)
report->day += sum;
if (!sum) {
SPLAY_REMOVE(kctree, kctree, kc);
keycount_free(kc);
}
}
return (tree);
}
void
make_report(struct kctree *kctree, char *filename,
void (*extract)(struct keycount *, void **, size_t *),
char *(*print)(void *, size_t))
{
struct reporttree *tree = report_create(kctree, extract);
report_print(tree, stderr, print);
if (filename != NULL)
report_to_file(tree, filename, print);
report_free(tree);
}
struct filterarg {
struct reporttree *src;
struct reporttree *dst;
};
void
analyze_filter_cb(void *reparg, void *treearg)
{
struct report *report = reparg;
struct filterarg *fa = treearg;
if (SPLAY_FIND(reporttree, fa->dst, report) != NULL)
return;
SPLAY_REMOVE(reporttree, fa->src, report);
SPLAY_INSERT(reporttree, fa->dst, report);
}
void
analyze_print_port_report()
{
struct reporttree *tree, *filtered_tree;
struct filtertree *min_filters, *hour_filters, *day_filters;
struct report *report;
struct filterarg fa;
tree = report_create(&ports, port_key_extract);
/* Filter trees for Minutes, Hours and Days */
min_filters = filter_create();
hour_filters = filter_create();
day_filters = filter_create();
SPLAY_FOREACH(report, reporttree, tree) {
filter_insert(min_filters, report->minute, report);
filter_insert(hour_filters, report->hour, report);
filter_insert(day_filters, report->day, report);
}
if ((filtered_tree = calloc(1, sizeof(struct reporttree))) == NULL)
err(1, "%s: calloc");
SPLAY_INIT(filtered_tree);
/*
* Object passed to the call back function to merge the different
* filter trees.
*/
fa.src = tree;
fa.dst = filtered_tree;
filter_top(min_filters, 5, analyze_filter_cb, &fa);
filter_top(hour_filters,10, analyze_filter_cb, &fa);
filter_top(day_filters, 15, analyze_filter_cb, &fa);
filter_free(min_filters);
filter_free(hour_filters);
filter_free(day_filters);
report_free(tree);
fprintf(stderr, "Destination Port Statistics\n");
report_print(filtered_tree, stderr, port_key_print);
if (port_report_file != NULL)
report_to_file(filtered_tree, port_report_file,
port_key_print);
report_free(filtered_tree);
}
void
analyze_print_spammer_report()
{
struct reporttree *tree, *filtered_tree;
struct filtertree *min_filters, *hour_filters, *day_filters;
struct report *report;
struct filterarg fa;
tree = report_create(&spammers, spammer_key_extract);
/* Filter trees for Minutes, Hours and Days */
min_filters = filter_create();
hour_filters = filter_create();
day_filters = filter_create();
SPLAY_FOREACH(report, reporttree, tree) {
filter_insert(min_filters, report->minute, report);
filter_insert(hour_filters, report->hour, report);
filter_insert(day_filters, report->day, report);
}
if ((filtered_tree = calloc(1, sizeof(struct reporttree))) == NULL)
err(1, "%s: calloc");
SPLAY_INIT(filtered_tree);
/*
* Object passed to the call back function to merge the different
* filter trees.
*/
fa.src = tree;
fa.dst = filtered_tree;
filter_top(min_filters, 5, analyze_filter_cb, &fa);
filter_top(hour_filters,10, analyze_filter_cb, &fa);
filter_top(day_filters, 20, analyze_filter_cb, &fa);
filter_free(min_filters);
filter_free(hour_filters);
filter_free(day_filters);
report_free(tree);
fprintf(stderr, "Spammer Address Statistics\n");
report_print(filtered_tree, stderr, spammer_key_print);
if (spammer_report_file != NULL)
report_to_file(filtered_tree, spammer_report_file,
spammer_key_print);
report_free(filtered_tree);
}
void
analyze_print_country_report()
{
struct reporttree *tree, *filtered_tree;
struct filtertree *min_filters, *hour_filters, *day_filters;
struct report *report;
struct filterarg fa;
tree = report_create(&countries, country_key_extract);
/* Filter trees for Minutes, Hours and Days */
min_filters = filter_create();
hour_filters = filter_create();
day_filters = filter_create();
SPLAY_FOREACH(report, reporttree, tree) {
filter_insert(min_filters, report->minute, report);
filter_insert(hour_filters, report->hour, report);
filter_insert(day_filters, report->day, report);
}
if ((filtered_tree = calloc(1, sizeof(struct reporttree))) == NULL)
err(1, "%s: calloc");
SPLAY_INIT(filtered_tree);
/*
* Object passed to the call back function to merge the different
* filter trees.
*/
fa.src = tree;
fa.dst = filtered_tree;
filter_top(min_filters, 5, analyze_filter_cb, &fa);
filter_top(hour_filters,10, analyze_filter_cb, &fa);
filter_top(day_filters, 20, analyze_filter_cb, &fa);
filter_free(min_filters);
filter_free(hour_filters);
filter_free(day_filters);
report_free(tree);
fprintf(stderr, "Country Activity Statistics\n");
report_print(filtered_tree, stderr, country_key_print);
if (country_report_file != NULL)
report_to_file(filtered_tree, country_report_file,
country_key_print);
report_free(filtered_tree);
}
void
analyze_print_report()
{
fprintf(stderr, "Operating System Statistics\n");
make_report(&oses, os_report_file, os_key_extract, os_key_print);
analyze_print_port_report();
analyze_print_spammer_report();
analyze_print_country_report();
}
void
analyze_report_cb(int fd, short what, void *arg)
{
struct event *ev = arg;
struct timeval tv;
timerclear(&tv);
tv.tv_sec = ANALYZE_REPORT_INTERVAL;
evtimer_add(ev, &tv);
analyze_print_report();
}
#define OS_NUM_OSES 12
void
os_test()
{
char *fingerprints[OS_NUM_OSES] = {
"Linux",
"Windows",
"NetBSD",
"OpenBSD",
"Windows",
"Windows",
"Linux",
"Windows",
"Linux",
"OpenBSD",
"FreeBSD",
"unknown"
};
struct addr src;
struct timeval tv;
rand_t *rand = rand_open();
int i, j;
gettimeofday(&tv, NULL);
count_set_time(&tv);
addr_pton("127.0.0.1", &src);
for (i = 0; i < 24 * 60 * 2 + 60 * 4; i++) {
for (j = 0; i < 24 * 60 * 2 &&
j < rand_uint8(rand) % 50000 + 5000; j++) {
src.addr_ip = rand_uint32(rand);
analyze_os_enter(&src,
fingerprints[rand_uint8(rand) % OS_NUM_OSES]);
}
tv.tv_sec += 30;
if (i % 120 == 0) {
fprintf(stderr, "%ld:\n", tv.tv_sec);
make_report(&oses, NULL, os_key_extract, os_key_print);
}
}
if (SPLAY_ROOT(&oses) != NULL)
errx(1, "oses fingerprints should have been purged");
count_set_time(NULL);
rand_close(rand);
fprintf(stderr, "\t%s: OK\n", __func__);
}
void
analyze_test(void)
{
os_test();
}
syntax highlighted by Code2HTML, v. 0.9.1