/* * Copyright (c) 2003, 2004 Niels Provos * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #ifdef HAVE_SYS_TIME_H #include #endif #include #include #include #include #include #include #include #include #include "tagging.h" #include "histogram.h" #include "keycount.h" #include "analyze.h" #include "filter.h" char *os_report_file = NULL; char *port_report_file = NULL; char *spammer_report_file = NULL; char *country_report_file = NULL; static int checkpoint_doreplay; /* externally set by honeydstats */ static struct event ev_analyze; struct kctree oses; struct kctree ports; struct kctree spammers; struct kctree countries; struct kctree country_cache; #define ROL64(x, b) (((x) << b) | ((x) >> (64 - b))) #define ROR64(x, b) (((x) >> b) | ((x) << (64 - b))) /* * Thomas Wang's 64-bit hash function from * www.concentric.net/~Ttwang/tech/inthash.htm */ static __inline uint64_t longhash1(uint64_t key) { key += ~(key << 32); key ^= ROR64(key, 22); key += ~(key << 13); key ^= ROR64(key, 8); key += (key << 3); key ^= ROR64(key, 15); key += ~(key << 27); key ^= ROR64(key, 31); return key; } static __inline uint32_t port_hash(const struct addr *src, const struct addr *dst) { uint32_t a = src->addr_ip; uint32_t b = dst->addr_ip; return ((uint32_t)(longhash1(((uint64_t)a << 32) | b))); } void port_key_extract(struct keycount *keycount, void **pkey, size_t *pkeylen) { if ((*pkey = calloc(1, sizeof(uint16_t))) == NULL) err(1, "%s: calloc"); memcpy(*pkey, keycount->key, sizeof(uint16_t)); *pkeylen = sizeof(uint16_t); } char * port_key_print(void *key, size_t keylen) { static char sport[7]; snprintf(sport, sizeof(sport), "%d", *(uint16_t *)key); return (sport); } void spammer_key_extract(struct keycount *keycount, void **pkey, size_t *pkeylen) { if ((*pkey = calloc(1, keycount->keylen)) == NULL) err(1, "%s: calloc"); memcpy(*pkey, keycount->key, keycount->keylen); *pkeylen = keycount->keylen; } char * spammer_key_print(void *key, size_t keylen) { struct addr addr; addr_pack(&addr, ADDR_TYPE_IP, IP_ADDR_BITS, key, IP_ADDR_LEN); return (addr_ntoa(&addr)); } void country_key_extract(struct keycount *keycount, void **pkey, size_t *pkeylen) { if ((*pkey = calloc(1, keycount->keylen)) == NULL) err(1, "%s: calloc"); memcpy(*pkey, keycount->key, keycount->keylen); *pkeylen = keycount->keylen; } char * country_key_print(void *key, size_t keylen) { return (key); } struct aux { SPLAY_HEAD(auxtree, auxkey) tree; TAILQ_HEAD(auxq, auxkey) queue; int limit; int entries; }; static int aux_compare(struct auxkey *a, struct auxkey *b) { if (a->value < b->value) return (-1); if (a->value > b->value) return (1); return (0); } SPLAY_PROTOTYPE(auxtree, auxkey, node, aux_compare); SPLAY_GENERATE(auxtree, auxkey, node, aux_compare); void * aux_create(void) { struct aux *aux; if ((aux = calloc(1, sizeof(struct aux))) == NULL) err(1, "%s: calloc"); SPLAY_INIT(&aux->tree); TAILQ_INIT(&aux->queue); aux->limit = 100000; /* Make better at some point */ return (aux); } void aux_free(void *arg) { struct aux *aux = arg; struct auxtree *tree = &aux->tree; struct auxkey *key; while ((key = SPLAY_ROOT(tree)) != NULL) { SPLAY_REMOVE(auxtree, tree, key); free(key); } free(arg); } /* Returns one if the key is new */ int aux_enter(struct aux *aux, uint32_t value) { struct auxtree *tree = &aux->tree; struct auxq *queue = &aux->queue; struct auxkey tmp, *key; tmp.value = value; if ((key = SPLAY_FIND(auxtree, tree, &tmp)) != NULL) { /* Mark this entry as recently used - LRU fashion */ TAILQ_REMOVE(queue, key, next); TAILQ_INSERT_HEAD(queue, key, next); return (0); } if (aux->entries >= aux->limit) { key = TAILQ_LAST(queue, auxq); /* The old entry has to go, bye bye */ SPLAY_REMOVE(auxtree, tree, key); TAILQ_REMOVE(queue, key, next); memset(key, 0, sizeof(struct auxkey)); aux->entries--; } else { if ((key = calloc(1, sizeof(struct auxkey))) == NULL) err(1, "%s: calloc"); } key->value = tmp.value; /* Insert the new key */ SPLAY_INSERT(auxtree, tree, key); TAILQ_INSERT_TAIL(queue, key, next); aux->entries++; return (1); } void os_key_extract(struct keycount *keycount, void **pkey, size_t *pkeylen) { const char *key = keycount->key; if ((*pkey = strdup(key)) == NULL) err(1, "%s: strdup"); *pkeylen = strlen(key) + 1; } char * os_key_print(void *key, size_t keylen) { return (key); } static int report_compare(struct report *a, struct report *b) { return (key_compare(a->key, a->keylen, b->key, b->keylen)); } SPLAY_HEAD(reporttree, report); SPLAY_PROTOTYPE(reporttree, report, node, report_compare); SPLAY_GENERATE(reporttree, report, node, report_compare); void analyze_init(void) { struct timeval tv; evtimer_set(&ev_analyze, analyze_report_cb, &ev_analyze); timerclear(&tv); tv.tv_sec = ANALYZE_REPORT_INTERVAL; evtimer_add(&ev_analyze, &tv); SPLAY_INIT(&oses); SPLAY_INIT(&ports); SPLAY_INIT(&spammers); SPLAY_INIT(&countries); SPLAY_INIT(&country_cache); evdns_init(); } void analyze_set_checkpoint_doreplay(int doit) { checkpoint_doreplay = doit; } void analyze_record(const struct record *record) { if (record->dst_port == 25 && record->bytes != 0) analyze_spammer_enter(&record->src, record->bytes); /* * Records may be duplicated if they carry extra payload hashes. * In those cases, we need to ignore the connection information. * We also want to ignore connection information for connections * that were generated by the honeypots. */ if ((record->state & RECORD_STATE_NEW) == 0 || (record->flags & REC_FLAG_LOCAL) != 0) return; /* Enter OS fingerprint */ if (record->os_fp == NULL) analyze_os_enter(&record->src, "unknown"); else analyze_os_enter(&record->src, record->os_fp); /* Enter Port Analysis */ analyze_port_enter(record->dst_port, &record->src, &record->dst); /* Entry country analysis */ analyze_country_enter(&record->src, &record->dst); } void analyze_spammer_enter(const struct addr *src, uint32_t bytes) { struct keycount tmpkey, *key; tmpkey.key = &src->addr_ip; tmpkey.keylen = sizeof(src->addr_ip); if ((key = SPLAY_FIND(kctree, &spammers, &tmpkey)) == NULL) { key = keycount_new(&src->addr_ip, sizeof(src->addr_ip), NULL, NULL); SPLAY_INSERT(kctree, &spammers, key); } count_increment(key->count, bytes); } struct country_state { struct addr src; struct addr dst; int result_from_cache; }; void analyze_country_enter_cb(int result, char type, int count, int ttl, void *addresses, void *arg) { struct country_state *state = arg; struct addr *src = &state->src; struct keycount tmpkey, *key; char tld[20]; if (result != DNS_ERR_NONE || count != 1 || type != DNS_PTR) { /* Enter into our negative cache */ if (!state->result_from_cache) { tmpkey.key = &src->addr_ip; tmpkey.keylen = sizeof(src->addr_ip); key = SPLAY_FIND(kctree, &country_cache, &tmpkey); if (!key) { key = keycount_new( &src->addr_ip, sizeof(src->addr_ip), NULL, NULL); SPLAY_INSERT(kctree, &country_cache, key); } count_increment(key->count, 1); } strlcpy(tld, "unknown", sizeof(tld)); } else { const char *hostname = *(char **)addresses; int i; /* Extract the country key */ for (i = strlen(hostname) - 1; i >= 0; --i) { if (hostname[i] == '.') { i += 1; break; } } strlcpy(tld, hostname + i, sizeof(tld)); for (i = 0; i < strlen(tld); i++) { if (isdigit(tld[i])) { strlcpy(tld, "unknown", sizeof(tld)); break; } tld[i] = tolower(tld[i]); } } tmpkey.key = tld; tmpkey.keylen = strlen(tmpkey.key) + 1; if ((key = SPLAY_FIND(kctree, &countries, &tmpkey)) == NULL) { key = keycount_new(tld, strlen(tld) + 1, aux_create, aux_free); SPLAY_INSERT(kctree, &countries, key); } /* If the address is new, we are going to resolve it */ if (aux_enter(key->auxilary, port_hash(&state->src, &state->dst))) count_increment(key->count, 1); free(state); } void analyze_country_enter(const struct addr *addr, const struct addr *dst) { struct keycount tmpkey, *key; struct country_state *state = calloc(1, sizeof(struct country_state)); if (state == NULL) err(1, "%s: calloc", __func__); state->src = *addr; state->dst = *dst; /* * Check if this IP returned a resolver error in the last hour. */ tmpkey.key = &addr->addr_ip; tmpkey.keylen = sizeof(addr->addr_ip); if ((key = SPLAY_FIND(kctree, &country_cache, &tmpkey)) != NULL) { if (count_get_minute(key->count) || count_get_hour(key->count)) { state->result_from_cache = 1; analyze_country_enter_cb(DNS_ERR_NOTEXIST, DNS_PTR, 0, 0, NULL, state); return; } } if (!checkpoint_doreplay) { struct in_addr in; in.s_addr = addr->addr_ip; evdns_resolve_reverse(&in, 0, analyze_country_enter_cb, state); } else { /* * If we are replaying a checkpoint, we do not want to do * async calls. */ struct hostent *hp = gethostbyaddr( (const char *)&addr->addr_ip, IP_ADDR_LEN, AF_INET); if (hp == NULL) { analyze_country_enter_cb(DNS_ERR_NOTEXIST, DNS_PTR, 0, 0, NULL, state); } else { analyze_country_enter_cb(DNS_ERR_NONE, DNS_PTR, 1, 1200 /* ttl */, (void *)&hp->h_name, state); } } } void analyze_os_enter(const struct addr *addr, const char *osfp) { struct keycount tmpkey, *key; tmpkey.key = osfp; tmpkey.keylen = strlen(osfp) + 1; if ((key = SPLAY_FIND(kctree, &oses, &tmpkey)) == NULL) { key = keycount_new(osfp, strlen(osfp) + 1, aux_create, aux_free); SPLAY_INSERT(kctree, &oses, key); } /* If the address is new, we are going to increase the counter */ if (aux_enter(key->auxilary, addr->addr_ip)) count_increment(key->count, 1); } void analyze_port_enter(uint16_t port, const struct addr *src, const struct addr *dst) { struct keycount tmpkey, *key; tmpkey.key = &port; tmpkey.keylen = sizeof(port); if ((key = SPLAY_FIND(kctree, &ports, &tmpkey)) == NULL) { key = keycount_new(&port, sizeof(port), aux_create, aux_free); SPLAY_INSERT(kctree, &ports, key); } /* If the address is new, we are going to increase the counter */ if (aux_enter(key->auxilary, port_hash(src, dst))) count_increment(key->count, 1); } void report_to_file(struct reporttree *tree, char *filename, char *(*print)(void *, size_t)) { static char line[1024]; FILE *fout; /* We do not create report files while we are replaying a checkpoint */ if (checkpoint_doreplay) return; /* create a temporary file */ strlcpy(line, filename, sizeof(line)); strlcat(line, ".tmp", sizeof(line)); if ((fout = fopen(line, "w")) != NULL) { report_print(tree, fout, print); fclose(fout); /* This is an atomic operation */ rename(line, filename); chmod(filename, S_IRWXU | S_IRGRP | S_IROTH); } else { warn("%s: fopen('%s')", __func__, line); } } void report_print(struct reporttree *tree, FILE *out, char *(*print)(void *, size_t)) { struct report *report; /* Now print the information in alphabetical order */ SPLAY_FOREACH(report, reporttree, tree) { fprintf(out, "%25s: %7d %7d %7d\n", print(report->key, report->keylen), report->minute, report->hour, report->day); } } void report_free(struct reporttree *tree) { struct report *report; /* And then free the whole tree */ while ((report = SPLAY_ROOT(tree)) != NULL) { SPLAY_REMOVE(reporttree, tree, report); free(report->key); free(report); } free(tree); } struct reporttree * report_create(struct kctree *kctree, void (*extract)(struct keycount *, void **, size_t *)) { struct reporttree *tree; struct report *report; struct keycount *kc, *next; if ((tree = calloc(1, sizeof(struct reporttree))) == NULL) err(1, "%s: calloc", __func__); SPLAY_INIT(tree); for (kc = SPLAY_MIN(kctree, kctree); kc != NULL; kc = next) { struct report tmp; uint32_t sum = 0; next = SPLAY_NEXT(kctree, kctree, kc); (*extract)(kc, &tmp.key, &tmp.keylen); if ((report = SPLAY_FIND(reporttree, tree, &tmp)) == NULL) { report = calloc(1, sizeof(struct report)); if (report == NULL) err(1, "%s: calloc"); report->key = tmp.key; report->keylen = tmp.keylen; SPLAY_INSERT(reporttree, tree, report); } /* Now get the data together */ if ((sum = count_get_minute(kc->count)) != 0) report->minute += sum; if ((sum += count_get_hour(kc->count)) != 0) report->hour += sum; if ((sum += count_get_day(kc->count)) != 0) report->day += sum; if (!sum) { SPLAY_REMOVE(kctree, kctree, kc); keycount_free(kc); } } return (tree); } void make_report(struct kctree *kctree, char *filename, void (*extract)(struct keycount *, void **, size_t *), char *(*print)(void *, size_t)) { struct reporttree *tree = report_create(kctree, extract); report_print(tree, stderr, print); if (filename != NULL) report_to_file(tree, filename, print); report_free(tree); } struct filterarg { struct reporttree *src; struct reporttree *dst; }; void analyze_filter_cb(void *reparg, void *treearg) { struct report *report = reparg; struct filterarg *fa = treearg; if (SPLAY_FIND(reporttree, fa->dst, report) != NULL) return; SPLAY_REMOVE(reporttree, fa->src, report); SPLAY_INSERT(reporttree, fa->dst, report); } void analyze_print_port_report() { struct reporttree *tree, *filtered_tree; struct filtertree *min_filters, *hour_filters, *day_filters; struct report *report; struct filterarg fa; tree = report_create(&ports, port_key_extract); /* Filter trees for Minutes, Hours and Days */ min_filters = filter_create(); hour_filters = filter_create(); day_filters = filter_create(); SPLAY_FOREACH(report, reporttree, tree) { filter_insert(min_filters, report->minute, report); filter_insert(hour_filters, report->hour, report); filter_insert(day_filters, report->day, report); } if ((filtered_tree = calloc(1, sizeof(struct reporttree))) == NULL) err(1, "%s: calloc"); SPLAY_INIT(filtered_tree); /* * Object passed to the call back function to merge the different * filter trees. */ fa.src = tree; fa.dst = filtered_tree; filter_top(min_filters, 5, analyze_filter_cb, &fa); filter_top(hour_filters,10, analyze_filter_cb, &fa); filter_top(day_filters, 15, analyze_filter_cb, &fa); filter_free(min_filters); filter_free(hour_filters); filter_free(day_filters); report_free(tree); fprintf(stderr, "Destination Port Statistics\n"); report_print(filtered_tree, stderr, port_key_print); if (port_report_file != NULL) report_to_file(filtered_tree, port_report_file, port_key_print); report_free(filtered_tree); } void analyze_print_spammer_report() { struct reporttree *tree, *filtered_tree; struct filtertree *min_filters, *hour_filters, *day_filters; struct report *report; struct filterarg fa; tree = report_create(&spammers, spammer_key_extract); /* Filter trees for Minutes, Hours and Days */ min_filters = filter_create(); hour_filters = filter_create(); day_filters = filter_create(); SPLAY_FOREACH(report, reporttree, tree) { filter_insert(min_filters, report->minute, report); filter_insert(hour_filters, report->hour, report); filter_insert(day_filters, report->day, report); } if ((filtered_tree = calloc(1, sizeof(struct reporttree))) == NULL) err(1, "%s: calloc"); SPLAY_INIT(filtered_tree); /* * Object passed to the call back function to merge the different * filter trees. */ fa.src = tree; fa.dst = filtered_tree; filter_top(min_filters, 5, analyze_filter_cb, &fa); filter_top(hour_filters,10, analyze_filter_cb, &fa); filter_top(day_filters, 20, analyze_filter_cb, &fa); filter_free(min_filters); filter_free(hour_filters); filter_free(day_filters); report_free(tree); fprintf(stderr, "Spammer Address Statistics\n"); report_print(filtered_tree, stderr, spammer_key_print); if (spammer_report_file != NULL) report_to_file(filtered_tree, spammer_report_file, spammer_key_print); report_free(filtered_tree); } void analyze_print_country_report() { struct reporttree *tree, *filtered_tree; struct filtertree *min_filters, *hour_filters, *day_filters; struct report *report; struct filterarg fa; tree = report_create(&countries, country_key_extract); /* Filter trees for Minutes, Hours and Days */ min_filters = filter_create(); hour_filters = filter_create(); day_filters = filter_create(); SPLAY_FOREACH(report, reporttree, tree) { filter_insert(min_filters, report->minute, report); filter_insert(hour_filters, report->hour, report); filter_insert(day_filters, report->day, report); } if ((filtered_tree = calloc(1, sizeof(struct reporttree))) == NULL) err(1, "%s: calloc"); SPLAY_INIT(filtered_tree); /* * Object passed to the call back function to merge the different * filter trees. */ fa.src = tree; fa.dst = filtered_tree; filter_top(min_filters, 5, analyze_filter_cb, &fa); filter_top(hour_filters,10, analyze_filter_cb, &fa); filter_top(day_filters, 20, analyze_filter_cb, &fa); filter_free(min_filters); filter_free(hour_filters); filter_free(day_filters); report_free(tree); fprintf(stderr, "Country Activity Statistics\n"); report_print(filtered_tree, stderr, country_key_print); if (country_report_file != NULL) report_to_file(filtered_tree, country_report_file, country_key_print); report_free(filtered_tree); } void analyze_print_report() { fprintf(stderr, "Operating System Statistics\n"); make_report(&oses, os_report_file, os_key_extract, os_key_print); analyze_print_port_report(); analyze_print_spammer_report(); analyze_print_country_report(); } void analyze_report_cb(int fd, short what, void *arg) { struct event *ev = arg; struct timeval tv; timerclear(&tv); tv.tv_sec = ANALYZE_REPORT_INTERVAL; evtimer_add(ev, &tv); analyze_print_report(); } #define OS_NUM_OSES 12 void os_test() { char *fingerprints[OS_NUM_OSES] = { "Linux", "Windows", "NetBSD", "OpenBSD", "Windows", "Windows", "Linux", "Windows", "Linux", "OpenBSD", "FreeBSD", "unknown" }; struct addr src; struct timeval tv; rand_t *rand = rand_open(); int i, j; gettimeofday(&tv, NULL); count_set_time(&tv); addr_pton("127.0.0.1", &src); for (i = 0; i < 24 * 60 * 2 + 60 * 4; i++) { for (j = 0; i < 24 * 60 * 2 && j < rand_uint8(rand) % 50000 + 5000; j++) { src.addr_ip = rand_uint32(rand); analyze_os_enter(&src, fingerprints[rand_uint8(rand) % OS_NUM_OSES]); } tv.tv_sec += 30; if (i % 120 == 0) { fprintf(stderr, "%ld:\n", tv.tv_sec); make_report(&oses, NULL, os_key_extract, os_key_print); } } if (SPLAY_ROOT(&oses) != NULL) errx(1, "oses fingerprints should have been purged"); count_set_time(NULL); rand_close(rand); fprintf(stderr, "\t%s: OK\n", __func__); } void analyze_test(void) { os_test(); }