/* ** Modular Logfile Analyzer ** Copyright 2000 Jan Kneschke ** ** Homepage: http://www.modlogan.org ** This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version, and provided that the above copyright and permission notice is included with all distributed copies of this or derived software. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA ** ** $Id: mstate.c,v 1.68 2004/08/27 20:07:37 ostborn Exp $ */ #include #include #include #include #include #include #include #include #include #include #undef MAX #undef MIN #include "config.h" #ifdef HAVE_LIBXML #include #include #endif #include "mlist.h" #include "mstate.h" #include "mdatatypes.h" #include "mlocale.h" #if 0 #define XML_DUMP 1 #endif #define STATE_FILE_VERSION "0.5" mstate *mstate_init() { mstate *state = malloc(sizeof(mstate)); assert(state); state->year = 0; state->month = 0; state->week = 0; state->timestamp = 0; state->ext = NULL; state->ext_type = M_STATE_TYPE_UNSET; /* not in the XML file * * just used to reduce the mem-usage */ return state; } void mstate_free(mstate *state) { if (!state) return; if (state->ext) { switch(state->ext_type) { case M_STATE_TYPE_WEB: mstate_free_web(state->ext); break; case M_STATE_TYPE_TELECOM: mstate_free_telecom(state->ext); break; case M_STATE_TYPE_TRAFFIC: mstate_free_traffic(state->ext); break; case M_STATE_TYPE_IPPL: mstate_free_ippl(state->ext); break; case M_STATE_TYPE_MAIL: mstate_free_mail(state->ext); break; case M_STATE_TYPE_UNSET: break; default: fprintf(stderr, "%s.%d: unknown substate type %d\n", __FILE__, __LINE__, state->ext_type); } } free(state); } mstate_web *mstate_init_web() { mstate_web *state = malloc(sizeof(mstate_web)); assert(state); memset(state, 0, sizeof(mstate_web)); state->visit_hash = mhash_init( 4 ); state->indexed_pages = mhash_init( 4 ); state->os_hash = mhash_init( 4 ); state->ua_hash = mhash_init( 4 /* 16 */); state->req_url_hash = mhash_init( 4 /* 128 */); state->req_prot_hash = mhash_init( 4 ); state->req_meth_hash = mhash_init( 4 ); state->status_hash = mhash_init( 4 ); state->host_hash = mhash_init( 4 /* 512 */ ); state->host_traffic = mhash_init( 4 /* 512 */ ); state->ref_url_hash = mhash_init( 4 /* 64 */ ); state->robots = mhash_init( 4 ); state->bookmarks = mhash_init( 4 ); state->status_internal_error = mhash_init( 4 ); state->status_missing_file = mhash_init( 4 /* 16 */); state->searchstring = mhash_init( 4 /* 32 */ ); state->searchsite = mhash_init( 4 ); state->country_hash = mhash_init( 4 ); state->extension = mhash_init( 4 ); state->visits = mhash_init( 4 /* 256 */); state->views = mhash_init( 4 /* 64 */); state->vhost_hash = mhash_init( 4 ); state->users = mhash_init( 4 ); state->location = mhash_init( 4 ); state->host_array = marray_init(); #if 0 #define D(x) \ fprintf(stderr, "%p = %s\n", state->x, #x); D(indexed_pages); D(os_hash ); D(ua_hash ); D(req_url_hash ); D(req_prot_hash ); D(req_meth_hash ); D(status_hash ); D(host_hash ); D(host_traffic ); D(ref_url_hash ); D(robots ); D(bookmarks ); D(status_internal_error ); D(status_missing_file ); D(searchstring ); D(searchsite ); D(country_hash ); D(extension ); D(visits ); D(views ); D(vhost_hash ); D(users ); #undef D #endif return state; } void mstate_free_web(mstate_web *state) { if (!state) return; #if 0 #define D(x) \ fprintf(stderr, "%p = %s - %d\n", state->x, #x, state->x->size); D(indexed_pages); D(os_hash ); D(ua_hash ); D(req_url_hash ); D(req_prot_hash ); D(req_meth_hash ); D(status_hash ); D(host_hash ); D(host_traffic ); D(ref_url_hash ); D(robots ); D(bookmarks ); D(status_internal_error ); D(status_missing_file ); D(searchstring ); D(searchsite ); D(country_hash ); D(extension ); D(visits ); D(views ); D(vhost_hash ); D(users ); #undef D #endif marray_free(state->host_array); mhash_free(state->os_hash); mhash_free(state->ua_hash); mhash_free(state->req_prot_hash); mhash_free(state->req_meth_hash); mhash_free(state->status_hash); mhash_free(state->host_hash); mhash_free(state->host_traffic); mhash_free(state->req_url_hash); mhash_free(state->ref_url_hash); mhash_free(state->bookmarks); mhash_free(state->robots); mhash_free(state->status_internal_error); mhash_free(state->status_missing_file); mhash_free(state->searchstring); mhash_free(state->searchsite); mhash_free(state->country_hash); mhash_free(state->indexed_pages); mhash_free(state->extension); mhash_free(state->visits); mhash_free(state->views); mhash_free(state->vhost_hash); mhash_free(state->visit_hash); mhash_free(state->users); mhash_free(state->location); free(state); } mstate_telecom *mstate_init_telecom() { int i; mstate_telecom *state = malloc(sizeof(mstate_telecom)); assert(state); state->called_numbers = mhash_init( 128 ); state->calling_numbers = mhash_init( 128 ); for (i = 0; i < 24; i++) { state->hours[i].incoming_calls = 0; state->hours[i].outgoing_calls = 0; } for (i = 0; i < 31; i++) { state->days[i].incoming_calls = 0; state->days[i].outgoing_calls = 0; } return state; } void mstate_free_telecom(mstate_telecom *state) { if (!state) return; mhash_free(state->called_numbers); mhash_free(state->calling_numbers); free(state); } mstate_mail *mstate_init_mail() { mstate_mail *state = malloc(sizeof(mstate_mail)); assert(state); memset(state, 0, sizeof(mstate_mail)); state->sender = mhash_init( 4 ); state->receipient = mhash_init( 4 ); state->send_domain = mhash_init( 4 ); state->recp_domain = mhash_init( 4 ); /* virus stuff */ state->virus = mhash_init( 4 ); state->subject = mhash_init( 4 ); state->scanner = mhash_init( 4 ); return state; } void mstate_free_mail(mstate_mail *state) { if (!state) return; mhash_free(state->sender); mhash_free(state->receipient); mhash_free(state->recp_domain); mhash_free(state->send_domain); mhash_free(state->virus); mhash_free(state->subject); mhash_free(state->scanner); free(state); } mstate_traffic *mstate_init_traffic() { mstate_traffic *state = malloc(sizeof(mstate_traffic)); assert(state); state->incoming = mhash_init( 128 ); state->outgoing = mhash_init( 128 ); state->external = mhash_init( 128 ); state->internal = mhash_init( 128 ); return state; } void mstate_free_traffic(mstate_traffic *state) { if (!state) return; mhash_free(state->incoming); mhash_free(state->outgoing); mhash_free(state->external); mhash_free(state->internal); free(state); } mstate_ippl *mstate_init_ippl() { mstate_ippl *state = malloc(sizeof(mstate_ippl)); assert(state); state->source_ips = mhash_init( 128 ); state->destination_ips = mhash_init( 128 ); state->source_ports = mhash_init( 128 ); state->destination_ports = mhash_init( 128 ); state->remote_idents = mhash_init( 32 ); state->protocol_names = mhash_init( 64 ); state->icmp_params = mhash_init( 16 ); state->watched_shosts = mhash_init( 128 ); state->watched_dports = mhash_init( 128 ); return state; } void mstate_free_ippl( mstate_ippl *state) { if (!state) return; mhash_free(state->source_ips); mhash_free(state->destination_ips); mhash_free(state->source_ports); mhash_free(state->destination_ports); mhash_free(state->remote_idents); mhash_free(state->protocol_names); mhash_free(state->icmp_params); mhash_free(state->watched_shosts); mhash_free(state->watched_dports); free(state); } int mhash_write(gzFile *fd, mhash *h) { int i; for (i = 0; i < h->size; i++) { if (h->data[i]->list) { mlist_write(fd, h->data[i]->list); } } return 0; } int mstate_write_web (mconfig *conf, mstate_web *state, gzFile *fd) { int i; gzprintf(fd, "<%s>\n", M_STATE_SECTION_WEB); gzprintf(fd, "<%s>\n", M_STATE_WEB_REQ_URL); mhash_write(fd, state->req_url_hash); gzprintf(fd, "\n", M_STATE_WEB_REQ_URL); gzprintf(fd, "<%s>\n", M_STATE_WEB_REQ_METHOD); mhash_write(fd, state->req_meth_hash); gzprintf(fd, "\n", M_STATE_WEB_REQ_METHOD); gzprintf(fd, "<%s>\n", M_STATE_WEB_REQ_PROTOCOL); mhash_write(fd, state->req_prot_hash); gzprintf(fd, "\n", M_STATE_WEB_REQ_PROTOCOL); gzprintf(fd, "<%s>\n", M_STATE_WEB_REF_URL); mhash_write(fd, state->ref_url_hash); gzprintf(fd, "\n", M_STATE_WEB_REF_URL); gzprintf(fd, "<%s>\n", M_STATE_WEB_OS); mhash_write(fd, state->os_hash); gzprintf(fd, "\n", M_STATE_WEB_OS); gzprintf(fd, "<%s>\n", M_STATE_WEB_USERAGENT); mhash_write(fd, state->ua_hash); gzprintf(fd, "\n", M_STATE_WEB_USERAGENT); gzprintf(fd, "<%s>\n", M_STATE_WEB_HOST); mhash_write(fd, state->host_hash); gzprintf(fd, "\n", M_STATE_WEB_HOST); gzprintf(fd, "<%s>\n", M_STATE_WEB_HOST_TRAFFIC); mhash_write(fd, state->host_traffic); gzprintf(fd, "\n", M_STATE_WEB_HOST_TRAFFIC); gzprintf(fd, "<%s>\n", M_STATE_WEB_STATUS); mhash_write(fd, state->status_hash); gzprintf(fd, "\n", M_STATE_WEB_STATUS); gzprintf(fd, "<%s>\n", M_STATE_WEB_VISIT); mhash_write(fd, state->visit_hash); gzprintf(fd, "\n", M_STATE_WEB_VISIT); gzprintf(fd, "<%s>\n", M_STATE_WEB_COUNTRIES); mhash_write(fd, state->country_hash); gzprintf(fd, "\n", M_STATE_WEB_COUNTRIES); gzprintf(fd, "<%s>\n", M_STATE_WEB_ROBOTS); mhash_write(fd, state->robots); gzprintf(fd, "\n", M_STATE_WEB_ROBOTS); gzprintf(fd, "<%s>\n", M_STATE_WEB_SEARCHSITE); mhash_write(fd, state->searchsite); gzprintf(fd, "\n", M_STATE_WEB_SEARCHSITE); gzprintf(fd, "<%s>\n", M_STATE_WEB_SEARCHSTRING); mhash_write(fd, state->searchstring); gzprintf(fd, "\n", M_STATE_WEB_SEARCHSTRING); gzprintf(fd, "<%s>\n", M_STATE_WEB_INT_ERROR); mhash_write(fd, state->status_internal_error); gzprintf(fd, "\n", M_STATE_WEB_INT_ERROR); gzprintf(fd, "<%s>\n", M_STATE_WEB_MISSING_FILE); mhash_write(fd, state->status_missing_file); gzprintf(fd, "\n", M_STATE_WEB_MISSING_FILE); gzprintf(fd, "<%s>\n", M_STATE_WEB_BOOKMARKS); mhash_write(fd, state->bookmarks); gzprintf(fd, "\n", M_STATE_WEB_BOOKMARKS); gzprintf(fd, "<%s>\n", M_STATE_WEB_INDEXED_PAGES); mhash_write(fd, state->indexed_pages); gzprintf(fd, "\n", M_STATE_WEB_INDEXED_PAGES); gzprintf(fd, "<%s>\n", M_STATE_WEB_EXTENSIONS); mhash_write(fd, state->extension); gzprintf(fd, "\n", M_STATE_WEB_EXTENSIONS); gzprintf(fd, "<%s>\n", M_STATE_WEB_VISITS); mhash_write(fd, state->visits); gzprintf(fd, "\n", M_STATE_WEB_VISITS); gzprintf(fd, "<%s>\n", M_STATE_WEB_VIEWS); mhash_write(fd, state->views); gzprintf(fd, "\n", M_STATE_WEB_VIEWS); gzprintf(fd, "<%s>\n", M_STATE_WEB_SRVHOST); mhash_write(fd, state->vhost_hash); gzprintf(fd, "\n", M_STATE_WEB_SRVHOST); gzprintf(fd, "<%s>\n", M_STATE_WEB_REQ_USERS); mhash_write(fd, state->users); gzprintf(fd, "\n", M_STATE_WEB_REQ_USERS); gzprintf(fd, "<%s>\n", M_STATE_WEB_LOCATION); mhash_write(fd, state->location); gzprintf(fd, "\n", M_STATE_WEB_LOCATION); gzprintf(fd, "<%s>\n", M_STATE_WEB_HOURS); for (i = 0; i < 24; i++) { gzprintf(fd, "\n", i); #define WR(x) \ gzprintf(fd, "<%s>%ld", #x, state->hours[i].x, #x); WR(hits); WR(files); WR(pages); WR(visits); WR(hosts); #undef WR #define WR(x) \ gzprintf(fd, "<%s>%.0f", #x, state->hours[i].x, #x); WR(xfersize); #undef WR gzprintf(fd, "\n"); } gzprintf(fd, "\n", M_STATE_WEB_HOURS); gzprintf(fd, "<%s>\n", M_STATE_WEB_DAYS); for (i = 0; i < 31; i++) { gzprintf(fd, "\n", i); #define WR(x) \ gzprintf(fd, "<%s>%ld", #x, state->days[i].x, #x); WR(hits); WR(files); WR(pages); WR(visits); WR(hosts); #undef WR #define WR(x) \ gzprintf(fd, "<%s>%.0f", #x, state->days[i].x, #x); WR(xfersize); #undef WR gzprintf(fd, "\n"); } gzprintf(fd, "\n", M_STATE_WEB_DAYS); gzprintf(fd, "\n", M_STATE_SECTION_WEB); return 0; } int mstate_write_mail (mconfig *conf, mstate_mail *state, gzFile *fd) { int i; gzprintf(fd, "<%s>\n", M_STATE_SECTION_MAIL); gzprintf(fd, "<%s>\n", M_STATE_MAIL_SENDER); mhash_write(fd, state->sender); gzprintf(fd, "\n", M_STATE_MAIL_SENDER); gzprintf(fd, "<%s>\n", M_STATE_MAIL_RECEIPIENT); mhash_write(fd, state->receipient); gzprintf(fd, "\n", M_STATE_MAIL_RECEIPIENT); gzprintf(fd, "<%s>\n", M_STATE_MAIL_SEND_DOMAIN); mhash_write(fd, state->send_domain); gzprintf(fd, "\n", M_STATE_MAIL_SEND_DOMAIN); gzprintf(fd, "<%s>\n", M_STATE_MAIL_RECP_DOMAIN); mhash_write(fd, state->recp_domain); gzprintf(fd, "\n", M_STATE_MAIL_RECP_DOMAIN); gzprintf(fd, "<%s>\n", M_STATE_MAIL_VIRUS); mhash_write(fd, state->virus); gzprintf(fd, "\n", M_STATE_MAIL_VIRUS); gzprintf(fd, "<%s>\n", M_STATE_MAIL_SCANNER); mhash_write(fd, state->scanner); gzprintf(fd, "\n", M_STATE_MAIL_SCANNER); gzprintf(fd, "<%s>\n", M_STATE_MAIL_SUBJECT); mhash_write(fd, state->subject); gzprintf(fd, "\n", M_STATE_MAIL_SUBJECT); gzprintf(fd, "<%s>\n", M_STATE_MAIL_HOURS); for (i = 0; i < 24; i++) { gzprintf(fd, "\n", i); #define WR(x) \ gzprintf(fd, "<%s>%ld\n", #x, state->hours[i].x, #x); WR(incoming_mails); WR(outgoing_mails); WR(incoming_bytes); WR(outgoing_bytes); #undef WR gzprintf(fd, "\n"); } gzprintf(fd, "\n", M_STATE_MAIL_HOURS); gzprintf(fd, "<%s>\n", M_STATE_MAIL_DAYS); for (i = 0; i < 31; i++) { gzprintf(fd, "\n", i); #define WR(x) \ gzprintf(fd, "<%s>%ld\n", #x, state->days[i].x, #x); WR(incoming_mails); WR(outgoing_mails); WR(incoming_bytes); WR(outgoing_bytes); #undef WR gzprintf(fd, "\n"); } gzprintf(fd, "\n", M_STATE_MAIL_DAYS); gzprintf(fd, "\n", M_STATE_SECTION_MAIL); return 0; } int mstate_write_ippl(mconfig *conf, mstate_ippl *state, gzFile *fd) { return 0; } int mstate_write_telecom (mconfig *conf, mstate_telecom *state, gzFile *fd) { return 0; } int mstate_write_traffic (mconfig *conf, mstate_traffic *state, gzFile *fd) { return 0; } int mstate_write (mconfig *conf, mstate *state, int _add_month, char *subpath) { char filename[255], *s; gzFile *fd; if (state == NULL) return -1; if (subpath) { /* protect the filename buffer */ if (strlen(conf->statedir) + strlen(subpath) > (sizeof(filename) - 48) ) { fprintf(stderr, "%s.%d: outputdir (%s) + subpath (%s) are too long\n", __FILE__, __LINE__, conf->statedir, subpath); return -1; } sprintf(filename, "%s/%s/", conf->statedir ? conf->statedir : ".", subpath); #if 0 /* take care of evil filenames */ if (realpath(filename,dirname) == NULL) { fprintf(stderr, "%s.%d: realpath failed on '%s' (%s): %s\n", __FILE__, __LINE__, filename, dirname, strerror(errno)); return -1; } /* make sure that we are still in the outputdir */ if (0 != strncmp(conf->statedir, dirname, strlen(conf->statedir))) { fprintf(stderr, "%s.%d: outputdir (%s) != (part) resulting dirname (%s)\n", __FILE__, __LINE__, conf->statedir, dirname); return -1; } #endif /* protect outputdir */ s = subpath; if (*s == '.' && *(s+1) == '.') { /* someone want to leave the outputdir */ fprintf(stderr, "%s.%d: subpath contains 2 dots (..) at the beginning: %s\n", __FILE__, __LINE__, subpath); return -1; } for (s = subpath; *s; s++) { if (*s == '/' && *(s+1) == '.' && *(s+2) == '.' && *(s+3) == '/') { fprintf(stderr, "%s.%d: subpath contains '/../': %s\n", __FILE__, __LINE__, subpath); return -1; } } mkdir(filename, 0755); if (_add_month == M_STATE_WRITE_DEFAULT) { sprintf(filename, "%s/%s/mla.state.xml", conf->statedir ? conf->statedir : ".", subpath); } else { sprintf(filename, "%s/%s/mla.state.xml.%04d%02d", conf->statedir ? conf->statedir : ".", subpath, state->year, state->month); } } else { if (_add_month == M_STATE_WRITE_DEFAULT) { sprintf(filename, "%s/mla.state.xml", conf->statedir ? conf->statedir : "."); } else { sprintf(filename, "%s/mla.state.xml.%04d%02d", conf->statedir ? conf->statedir : ".", state->year, state->month); } } if (conf->debug_level > 1) { fprintf(stderr, "%s.%d: STATE-Filename: %s\n", __FILE__, __LINE__, filename); } if ((fd = gzopen(filename, "wb")) == NULL) { fprintf(stderr, "%s.%d: can't open %s: %s\n", __FILE__, __LINE__, filename, strerror(errno)); return -1; } /* header */ gzprintf(fd, "\n"); gzprintf(fd, "\n"); gzprintf(fd, "\n", STATE_FILE_VERSION, PACKAGE); /* global values */ gzprintf(fd, "", state->week ? "weekly" : "monthly"); gzprintf(fd, "%ld", state->timestamp); gzprintf(fd, "%d", state->year); gzprintf(fd, "%d", state->month); gzprintf(fd, "%d", state->week); gzprintf(fd, "\n"); /* FIXME: input, processor, output */ if (state->ext) { switch(state->ext_type) { case M_STATE_TYPE_WEB: mstate_write_web(conf, state->ext, fd); break; case M_STATE_TYPE_TELECOM: mstate_write_telecom(conf, state->ext, fd); break; case M_STATE_TYPE_IPPL: mstate_write_ippl(conf, state->ext, fd); break; case M_STATE_TYPE_TRAFFIC: mstate_write_traffic(conf, state->ext, fd); break; case M_STATE_TYPE_MAIL: mstate_write_mail(conf, state->ext, fd); break; default: fprintf(stderr, "%s.%d: unknown substate type\n", __FILE__, __LINE__); } } else { if (conf->debug_level > 1) fprintf(stderr, "%s.%d: no state extension\n", __FILE__, __LINE__); } gzprintf(fd, "\n"); gzclose(fd); return 0; } int mstate_read_web (void *user_data, m_tag tagtype, const xmlChar *value, const xmlChar **attrs) { int i = 0; mdata_values state_values[] = { { M_STATE_WEB_REQ_URL, M_DATA_FIELDTYPE_HASH }, /* 0 */ { M_STATE_WEB_REQ_METHOD, M_DATA_FIELDTYPE_HASH }, /* 1 */ { M_STATE_WEB_REQ_PROTOCOL, M_DATA_FIELDTYPE_HASH }, /* 2 */ { M_STATE_WEB_REF_URL, M_DATA_FIELDTYPE_HASH }, /* 3 */ { M_STATE_WEB_OS, M_DATA_FIELDTYPE_HASH }, /* 4 */ { M_STATE_WEB_USERAGENT, M_DATA_FIELDTYPE_HASH }, /* 5 */ { M_STATE_WEB_STATUS, M_DATA_FIELDTYPE_HASH }, /* 6 */ { M_STATE_WEB_VISIT, M_DATA_FIELDTYPE_HASH }, /* 7 */ { M_STATE_WEB_HOURS, M_DATA_FIELDTYPE_WEB_ARRAY }, /* 8 */ { M_STATE_WEB_DAYS, M_DATA_FIELDTYPE_WEB_ARRAY }, /* 9 */ { M_STATE_WEB_COUNTRIES, M_DATA_FIELDTYPE_HASH }, /* 10 */ { M_STATE_WEB_ROBOTS, M_DATA_FIELDTYPE_HASH }, /* 11 */ { M_STATE_WEB_SEARCHSITE, M_DATA_FIELDTYPE_HASH }, /* 12 */ { M_STATE_WEB_SEARCHSTRING, M_DATA_FIELDTYPE_HASH }, /* 13 */ { M_STATE_WEB_INT_ERROR, M_DATA_FIELDTYPE_HASH }, /* 14 */ { M_STATE_WEB_MISSING_FILE, M_DATA_FIELDTYPE_HASH }, /* 15 */ { M_STATE_WEB_BOOKMARKS, M_DATA_FIELDTYPE_HASH }, /* 16 */ { M_STATE_WEB_HOST, M_DATA_FIELDTYPE_HASH }, /* 17 */ { M_STATE_WEB_INDEXED_PAGES, M_DATA_FIELDTYPE_HASH }, /* 18 */ { M_STATE_WEB_EXTENSIONS, M_DATA_FIELDTYPE_HASH }, /* 19 */ { M_STATE_WEB_VISITS, M_DATA_FIELDTYPE_HASH }, /* 20 */ { M_STATE_WEB_VIEWS, M_DATA_FIELDTYPE_HASH }, /* 21 */ { M_STATE_WEB_SRVHOST, M_DATA_FIELDTYPE_HASH }, /* 22 */ { M_STATE_WEB_REQ_USERS, M_DATA_FIELDTYPE_HASH }, /* 23 */ { M_STATE_WEB_LOCATION, M_DATA_FIELDTYPE_HASH }, /* 24 */ { M_STATE_WEB_HOST_TRAFFIC, M_DATA_FIELDTYPE_HASH }, /* 25 */ /* add new tags HERE and modify the wrap-around value in the switch() below */ /* support for old tags from 0.7.x */ { "req_url", M_DATA_FIELDTYPE_HASH }, /* 26 (wrap-around value)*/ { "req_meth", M_DATA_FIELDTYPE_HASH }, { "req_prot", M_DATA_FIELDTYPE_HASH }, { "ref_url", M_DATA_FIELDTYPE_HASH }, { "os", M_DATA_FIELDTYPE_HASH }, { "useragent", M_DATA_FIELDTYPE_HASH }, { "status", M_DATA_FIELDTYPE_HASH }, { "visit", M_DATA_FIELDTYPE_HASH }, { "hours", M_DATA_FIELDTYPE_WEB_ARRAY }, { "days", M_DATA_FIELDTYPE_WEB_ARRAY }, { "countries", M_DATA_FIELDTYPE_HASH }, { "robots", M_DATA_FIELDTYPE_HASH }, { "searchsite", M_DATA_FIELDTYPE_HASH }, { "searchstring", M_DATA_FIELDTYPE_HASH }, { "int_error", M_DATA_FIELDTYPE_HASH }, { "missing_file", M_DATA_FIELDTYPE_HASH }, { "bookmarks", M_DATA_FIELDTYPE_HASH }, { "host", M_DATA_FIELDTYPE_HASH }, { "indexed_pages", M_DATA_FIELDTYPE_HASH }, { "extensions", M_DATA_FIELDTYPE_HASH }, { "visit_path", M_DATA_FIELDTYPE_HASH }, { "views", M_DATA_FIELDTYPE_HASH }, { "srvhost", M_DATA_FIELDTYPE_HASH }, { NULL, M_DATA_FIELDTYPE_UNSET, } }; mstate_stack *m = user_data; // M_WP(); switch(tagtype) { case M_TAG_BEGIN: for (i = 0; state_values[i].string && (0 != strcmp(state_values[i].string, value)); i++) ; if (!state_values[i].string) return -1; #define SET_DATA(x) \ m->st[m->st_depth].data = (((mstate_web *)(m->st[m->st_depth-1].data))->x); /* wrap-around at 26 for the 0.7.x support */ switch (i % 26) { case 0: SET_DATA(req_url_hash); break; case 1: SET_DATA(req_meth_hash); break; case 2: SET_DATA(req_prot_hash); break; case 3: SET_DATA(ref_url_hash); break; case 4: SET_DATA(os_hash); break; case 5: SET_DATA(ua_hash); break; case 6: SET_DATA(status_hash); break; case 7: SET_DATA(visit_hash); break; case 8: m->st[m->st_depth].data = &(((mstate_web *)(m->st[m->st_depth-1].data))->hours); break; case 9: m->st[m->st_depth].data = &(((mstate_web *)(m->st[m->st_depth-1].data))->days); break; case 10: SET_DATA(country_hash); break; case 11: SET_DATA(robots); break; case 12: SET_DATA(searchsite);break; case 13: SET_DATA(searchstring);break; case 14: SET_DATA(status_internal_error);break; case 15: SET_DATA(status_missing_file);break; case 16: SET_DATA(bookmarks);break; case 17: SET_DATA(host_hash);break; case 18: SET_DATA(indexed_pages);break; case 19: SET_DATA(extension);break; case 20: SET_DATA(visits);break; case 21: SET_DATA(views);break; case 22: SET_DATA(vhost_hash);break; case 23: SET_DATA(users);break; case 24: SET_DATA(location);break; case 25: SET_DATA(host_traffic);break; default: return -1; } #undef SET_DATA m->st[m->st_depth].function = mdata_insert_value; m->st[m->st_depth].type = state_values[i].type; break; case M_TAG_END: #if 0 fprintf(stderr, "%s\n", value); M_WP(); #endif break; case M_TAG_TEXT: M_WP(); break; default: M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS, "can't handle tagtype '%d'\n", tagtype); } return 0; } int mstate_read_traffic (void *user_data, m_tag tagtype, const xmlChar *value, const xmlChar **attrs) { mstate_stack *m = user_data; int i = 0; mdata_values state_values[] = { { M_STATE_TRAFFIC_INCOMING, M_DATA_FIELDTYPE_HASH }, /* 0 */ { M_STATE_TRAFFIC_OUTGOING, M_DATA_FIELDTYPE_HASH }, /* 1 */ { M_STATE_TRAFFIC_EXTERNAL, M_DATA_FIELDTYPE_HASH }, /* 2 */ { M_STATE_TRAFFIC_INTERNAL, M_DATA_FIELDTYPE_HASH }, /* 3 */ { NULL, M_DATA_FIELDTYPE_UNSET, } }; // M_WP(); switch(tagtype) { case M_TAG_BEGIN: for (i = 0; state_values[i].string && (0 != strcmp(state_values[i].string, value)); i++) ; if (!state_values[i].string) return -1; #define SET_DATA(x) \ m->st[m->st_depth].data = (((mstate_traffic *)(m->st[m->st_depth-1].data))->x); switch (i) { case 0: SET_DATA(incoming); break; case 1: SET_DATA(outgoing); break; case 2: SET_DATA(external); break; case 3: SET_DATA(internal); break; default: return -1; } #undef SET_DATA m->st[m->st_depth].function = mdata_insert_value; m->st[m->st_depth].type = state_values[i].type; break; case M_TAG_END: M_WP(); break; case M_TAG_TEXT: M_WP(); break; default: M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS, "can't handle tagtype '%d'\n", tagtype); } return 0; } int mstate_read_telecom (void *user_data, m_tag tagtype, const xmlChar *value, const xmlChar **attrs) { mstate_stack *m = user_data; int i = 0; mdata_values state_values[] = { { M_STATE_TELECOM_CALLING, M_DATA_FIELDTYPE_HASH }, /* 0 */ { M_STATE_TELECOM_CALLED, M_DATA_FIELDTYPE_HASH }, /* 1 */ { M_STATE_TELECOM_HOURS, M_DATA_FIELDTYPE_TELECOM_ARRAY }, /* 2 */ { M_STATE_TELECOM_DAYS, M_DATA_FIELDTYPE_TELECOM_ARRAY }, /* 3 */ { NULL, M_DATA_FIELDTYPE_UNSET, } }; // M_WP(); switch(tagtype) { case M_TAG_BEGIN: for (i = 0; state_values[i].string && (0 != strcmp(state_values[i].string, value)); i++) ; if (!state_values[i].string) return -1; #define SET_DATA(x) \ m->st[m->st_depth].data = (((mstate_telecom *)(m->st[m->st_depth-1].data))->x); switch (i) { case 0: SET_DATA(calling_numbers); break; case 1: SET_DATA(called_numbers); break; case 2: m->st[m->st_depth].data = &(((mstate_telecom *)(m->st[m->st_depth-1].data))->hours); break; case 3: m->st[m->st_depth].data = &(((mstate_telecom *)(m->st[m->st_depth-1].data))->days); break; default: return -1; } #undef SET_DATA m->st[m->st_depth].function = mdata_insert_value; m->st[m->st_depth].type = state_values[i].type; break; case M_TAG_END: M_WP(); break; case M_TAG_TEXT: M_WP(); break; default: M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS, "can't handle tagtype '%d'\n", tagtype); } return 0; } int mstate_read_mail (void *user_data, m_tag tagtype, const xmlChar *value, const xmlChar **attrs) { mstate_stack *m = user_data; int i = 0; mdata_values state_values[] = { { M_STATE_MAIL_SENDER, M_DATA_FIELDTYPE_HASH }, /* 0 */ { M_STATE_MAIL_RECEIPIENT, M_DATA_FIELDTYPE_HASH }, /* 1 */ { M_STATE_MAIL_VIRUS, M_DATA_FIELDTYPE_HASH }, /* 2 */ { M_STATE_MAIL_SCANNER, M_DATA_FIELDTYPE_HASH }, /* 3 */ { M_STATE_MAIL_SUBJECT, M_DATA_FIELDTYPE_HASH }, /* 4 */ { M_STATE_MAIL_SEND_DOMAIN, M_DATA_FIELDTYPE_HASH }, /* 5 */ { M_STATE_MAIL_RECP_DOMAIN, M_DATA_FIELDTYPE_HASH }, /* 6 */ { M_STATE_MAIL_HOURS, M_DATA_FIELDTYPE_MAIL_ARRAY }, /* 7 */ { M_STATE_MAIL_DAYS, M_DATA_FIELDTYPE_MAIL_ARRAY }, /* 8 */ { NULL, M_DATA_FIELDTYPE_UNSET, } }; // M_WP(); switch(tagtype) { case M_TAG_BEGIN: for (i = 0; state_values[i].string && (0 != strcmp(state_values[i].string, value)); i++) ; if (!state_values[i].string) return -1; #define SET_DATA(x) \ m->st[m->st_depth].data = (((mstate_mail *)(m->st[m->st_depth-1].data))->x); switch (i) { case 0: SET_DATA(sender); break; case 1: SET_DATA(receipient); break; case 2: SET_DATA(virus); break; case 3: SET_DATA(scanner); break; case 4: SET_DATA(subject); break; case 5: SET_DATA(send_domain); break; case 6: SET_DATA(recp_domain); break; case 7: m->st[m->st_depth].data = &(((mstate_mail *)(m->st[m->st_depth-1].data))->hours); break; case 8: m->st[m->st_depth].data = &(((mstate_mail *)(m->st[m->st_depth-1].data))->days); break; default: return -1; } #undef SET_DATA m->st[m->st_depth].function = mdata_insert_value; m->st[m->st_depth].type = state_values[i].type; break; case M_TAG_END: M_WP(); break; case M_TAG_TEXT: M_WP(); break; default: M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS, "can't handle tagtype '%d'\n", tagtype); } return 0; } int mstate_read_global (void *user_data, m_tag tagtype, const xmlChar *value, const xmlChar **attrs) { int i; mstate_stack *m = user_data; const mdata_values state_values[] = { { "year", M_DATA_FIELDTYPE_LONG }, { "month", M_DATA_FIELDTYPE_LONG }, { "week", M_DATA_FIELDTYPE_LONG }, { "timestamp", M_DATA_FIELDTYPE_LONG }, { NULL, M_DATA_FIELDTYPE_UNSET } }; switch(tagtype) { case M_TAG_BEGIN: // M_WP(); for (i = 0; state_values[i].string && (0 != strcmp(state_values[i].string, value)); i++) ; if (!state_values[i].string) return -1; switch (i) { case 0: /* year */ m->st[m->st_depth].data = &(((mstate *)(m->st[m->st_depth-1].data))->year); break; case 1: /* month */ m->st[m->st_depth].data = &(((mstate *)(m->st[m->st_depth-1].data))->month); break; case 2: /* week */ m->st[m->st_depth].data = &(((mstate *)(m->st[m->st_depth-1].data))->week); break; case 3: /* timestamp */ m->st[m->st_depth].data = &(((mstate *)(m->st[m->st_depth-1].data))->timestamp); break; default: return -1; } /* the same for all 4 values */ m->st[m->st_depth].function = mdata_insert_value; m->st[m->st_depth].type = state_values[i].type; break; case M_TAG_END: #if 0 fprintf(stderr, "%s\n", value); M_WP(); #endif break; case M_TAG_TEXT: M_WP(); break; default: M_DEBUG1(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS, "can't handle tagtype '%d'\n", tagtype); } return 0; } #ifdef HAVE_LIBXML static xmlSAXHandler mlaSAXHandler = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; #endif /** * process the start tags of the statefiles * * SAX callback for the XML starttags * */ void mstate_startElement(void *user_data, const xmlChar *name, const xmlChar **attrs) { int i; mstate_stack *m = user_data; enum { M_STATE_ST_UNSET, M_STATE_ST_WEB, M_STATE_ST_TELECOM, M_STATE_ST_TRAFFIC, M_STATE_ST_GLOBAL, M_STATE_ST_MAIL }; const mdata_values matches [] = { /* 0.8.0 and above */ { M_STATE_SECTION_WEB, M_STATE_ST_WEB }, { M_STATE_SECTION_TELECOM, M_STATE_ST_TELECOM }, { M_STATE_SECTION_TRAFFIC, M_STATE_ST_TRAFFIC }, { M_STATE_SECTION_GLOBAL, M_STATE_ST_GLOBAL }, { M_STATE_SECTION_MAIL, M_STATE_ST_MAIL }, /* 0.7.x */ { "web", M_STATE_ST_WEB }, { "telecom", M_STATE_ST_TELECOM }, { "traffic", M_STATE_ST_TRAFFIC }, { "global", M_STATE_ST_GLOBAL }, { "mail", M_STATE_ST_MAIL }, { NULL, M_STATE_ST_UNSET } }; // M_WP(); #ifdef XML_DUMP for (i = 0; i < m->st_depth; i++) { fprintf(stderr, " "); } fprintf(stderr, "> %s ", name); if (attrs) { for (i = 0; attrs[i]; i += 2) { fprintf(stderr, "%s=%s ", attrs[i], attrs[i+1]); } } fprintf(stderr, "\n"); #endif #if 0 fprintf(stderr, "stack: "); for (i = 0; i < m->st_depth+1; i++) { fprintf(stderr, "%p ", m->st[i].data); } fprintf(stderr, "\n"); #endif if (m->st_depth == 0) { /* state */ m->st[m->st_depth].function = NULL; m->st[m->st_depth].data = m->state; } else if (m->st_depth == 1) { /* main-sections: global, web, mail, ... */ for (i = 0; matches[i].string && (0 != strcmp(matches[i].string, name)); i++) ; if (!matches[i].string) return; switch(matches[i].type) { case M_STATE_ST_WEB: m->st[m->st_depth].function = mstate_read_web; m->st[m->st_depth].data = mstate_init_web(); break; case M_STATE_ST_GLOBAL: m->st[m->st_depth].function = mstate_read_global; m->st[m->st_depth].data = m->state; break; case M_STATE_ST_MAIL: m->st[m->st_depth].function = mstate_read_mail; m->st[m->st_depth].data = mstate_init_mail(); break; case M_STATE_ST_TELECOM: m->st[m->st_depth].function = mstate_read_telecom; m->st[m->st_depth].data = mstate_init_telecom(); break; case M_STATE_ST_TRAFFIC: m->st[m->st_depth].function = mstate_read_traffic; m->st[m->st_depth].data = mstate_init_traffic(); break; default: m->st[m->st_depth].function = NULL; m->st[m->st_depth].data = NULL; break; } m->st[m->st_depth].type = matches[i].type; } else { if ((*(m->st[m->st_depth-1].function))(m, M_TAG_BEGIN, name, attrs)) { fprintf(stderr, "%s failed on %s\n", __FUNCTION__, name); exit(-1); } } m->st_depth++; } /** * process the end tags of the statefiles * * SAX callback for the XML endtags * */ void mstate_endElement(void *user_data, const xmlChar *name) { mstate_stack *m = user_data; enum { M_STATE_ST_UNSET, M_STATE_ST_WEB, M_STATE_ST_TELECOM, M_STATE_ST_TRAFFIC, M_STATE_ST_GLOBAL, M_STATE_ST_MAIL }; #ifdef XML_DUMP int i; for (i = 0; i < m->st_depth-1; i++) { fprintf(stderr, " "); } fprintf(stderr, "< %s\n", name); #endif #if 0 if (0 == strcmp("global", name)) { fprintf(stderr, "< %s (%d)\n", name, m->st_depth); } #endif if (m->st_depth > 1) { if ((*(m->st[m->st_depth-1].function))(m, M_TAG_END, name, NULL)) { fprintf(stderr, "%s failed\n", __FUNCTION__); exit(-1); } } else if (m->st_depth == 1) { switch(m->st[m->st_depth].type) { case M_STATE_ST_WEB: ((mstate*)(m->state))->ext = m->st[m->st_depth].data; ((mstate*)(m->state))->ext_type = M_STATE_TYPE_WEB; break; case M_STATE_ST_GLOBAL: break; case M_STATE_ST_MAIL: ((mstate*)(m->state))->ext = m->st[m->st_depth].data; ((mstate*)(m->state))->ext_type = M_STATE_TYPE_MAIL; break; case M_STATE_ST_TELECOM: ((mstate*)(m->state))->ext = m->st[m->st_depth].data; ((mstate*)(m->state))->ext_type = M_STATE_TYPE_TELECOM; break; case M_STATE_ST_TRAFFIC: ((mstate*)(m->state))->ext = m->st[m->st_depth].data; ((mstate*)(m->state))->ext_type = M_STATE_TYPE_TRAFFIC; break; default: M_WP(); break; } } m->st_depth--; } /** * process the content (text( of the statefiles * * SAX callback for the XML content (!= tags) * */ void mstate_characters(void *user_data, const xmlChar *name, int len) { #ifdef XML_DUMP int i; #endif mstate_stack *m = user_data; if (*name == '\n') return; if (len + 1 > m->textbuf_size) { if (m->textbuf) free(m->textbuf); m->textbuf = malloc(len + 1); m->textbuf_size = len + 1; } strncpy(m->textbuf, name, len); m->textbuf[len] = '\0'; // M_WP(); #ifdef XML_DUMP for (i = 0; i < m->st_depth; i++) { fprintf(stderr, " "); } fprintf(stderr, "(%s)\n", s); #endif if (m->st_depth > 0) { if ((*(m->st[m->st_depth-1].function))(m, M_TAG_TEXT, m->textbuf, NULL)) { fprintf(stderr, "%s failed\n", __FUNCTION__); exit(-1); } } } int mstate_read (mconfig *conf, mstate *state, int year, int month, char *subpath) { char filename[255]; mstate_stack m; int i; #ifdef HAVE_EXPAT XML_Parser p; gzFile f; char buff[1024]; #endif if (year == 0 || month == 0) { sprintf(filename, "%s/%s/mla.state.xml", conf->statedir, subpath ? subpath : ""); } else { sprintf(filename, "%s/%s/mla.state.xml.%04d%02d", conf->statedir, subpath ? subpath : "", year, month ); } m.ext_conf = conf; m.state = state; m.st_depth = 0; m.textbuf = NULL; m.textbuf_size = 0; for (i = 0; i < M_STATE_ST_ELEM; i++) { m.st[i].function = NULL; m.st[i].data = NULL; m.st[i].type = -1; } #ifdef HAVE_LIBXML mlaSAXHandler.startElement = mstate_startElement; mlaSAXHandler.endElement = mstate_endElement; mlaSAXHandler.characters = mstate_characters; mlaSAXHandler.warning = xmlParserWarning; mlaSAXHandler.error = xmlParserError; mlaSAXHandler.fatalError = xmlParserError; xmlSAXUserParseFile(&mlaSAXHandler, &m, filename); #elif HAVE_EXPAT p = XML_ParserCreate(NULL); XML_SetElementHandler(p, mstate_startElement, mstate_endElement); XML_SetCharacterDataHandler(p, mstate_characters); XML_SetUserData(p, &m); if (NULL == (f = gzopen(filename, "rb"))) { return -1; } for (;;) { int bytes_read; bytes_read = gzread(f, buff, sizeof(buff)); if (bytes_read < 0) { /* handle error */ M_DEBUG0(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS, "gzread() failed\n" ); return -1; } if (! XML_Parse(p, buff, bytes_read, bytes_read == 0)) { /* handle parse error */ M_DEBUG3(M_DEBUG_LEVEL_ERRORS, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS, "XML: Line %d (Char %d): %s\n", XML_GetCurrentLineNumber(p), XML_GetCurrentColumnNumber(p), XML_ErrorString(XML_GetErrorCode(p))); return -1; } if (bytes_read == 0) break; } gzclose(f); XML_ParserFree(p); #else #error no xml parser #endif if (m.textbuf) free(m.textbuf); return 0; }