/* ** Modular Logfile Analyzer ** Copyright 2000 Jan Kneschke ** ** Homepage: http://www.modlogan.org ** This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version, and provided that the above copyright and permission notice is included with all distributed copies of this or derived software. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA ** ** $Id: plugin_config.c,v 1.44 2004/08/27 18:38:49 ostborn Exp $ */ #include #include #include #include #include #include #include #include "mlocale.h" #include "mplugins.h" #include "mrecord.h" #include "mdatatypes.h" #include "misc.h" #include "plugin_config.h" int mplugins_input_clf_dlinit(mconfig *ext_conf) { config_input *conf = NULL; #ifdef USE_REGEX_VERSIONS const char *errptr; int erroffset = 0; #endif int i; if (0 != strcmp(ext_conf->version, VERSION)) { M_DEBUG2(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS, "version string doesn't match: (mla) %s != (plugin) %s\n", ext_conf->version, VERSION); return -1; } conf = malloc(sizeof(config_input)); memset(conf, 0, sizeof(config_input)); conf->match_useragent = mlist_init(); conf->record_list = mlist_init(); conf->inputfilename = NULL; conf->dont_strip_hostname = 0; /* will be replaced by setdefaults if we have a real inputfilename */ conf->buf = buffer_init(); for (i = 0; i < UA_CACHE_SIZE; i++) { conf->ua_cache[i].timestamp = 0; conf->ua_cache[i].key = NULL; conf->ua_cache[i].ua_os = NULL; } conf->match_clf = NULL; conf->match_clf_squid = NULL; conf->match_clf_extended = NULL; conf->match_clf_extra = NULL; conf->match_clf_squid_extra = NULL; conf->match_clf_extended_extra = NULL; ext_conf->plugin_conf = conf; return 0; } int mplugins_input_clf_dlclose(mconfig *ext_conf) { config_input *conf = ext_conf->plugin_conf; int i; if (conf->inputfilename) free(conf->inputfilename); mclose(&(conf->inputfile)); mlist_free(conf->match_useragent); mlist_free(conf->record_list); if (conf->match_clf) pcre_free(conf->match_clf); buffer_free(conf->buf); if (conf->format) free(conf->format); for (i = 0; i < UA_CACHE_SIZE; i++) { if (conf->ua_cache[i].key) free(conf->ua_cache[i].key); if (conf->ua_cache[i].ua_os) free(conf->ua_cache[i].ua_os); } free(ext_conf->plugin_conf); ext_conf->plugin_conf = NULL; return 0; } int mplugins_input_clf_parse_config(mconfig *ext_conf, const char *filename, const char *section) { config_input *conf = ext_conf->plugin_conf; const mconfig_values config_values[] = { {"match_useragent", M_CONFIG_TYPE_SUBSTITUTE, M_CONFIG_VALUE_APPEND, &(conf->match_useragent)}, {"inputfile", M_CONFIG_TYPE_STRING, M_CONFIG_VALUE_OVERWRITE, &(conf->inputfilename)}, {"format", M_CONFIG_TYPE_STRING, M_CONFIG_VALUE_OVERWRITE, &(conf->format)}, {"dont_strip_hostname", M_CONFIG_TYPE_INT, M_CONFIG_VALUE_OVERWRITE, (&conf->dont_strip_hostname)}, {NULL, M_CONFIG_TYPE_INT, 0, NULL} }; return mconfig_parse_section(ext_conf, filename, section, config_values); } typedef struct { char *field; int id; char *match; } clf_field_def; static const clf_field_def def[] = /* %a Remote IP-address - handled %A Local IP-address - not handled %b Bytes sent, excluding HTTP headers. - handled %B Bytes sent, excluding HTTP headers. - not handled %{FOOBAR}e - not handled The contents of the environment variable FOOBAR %D The time taken to serve the - handled request, in microseconds. %f Filename - not handled %h Remote host - handled %H The request protocol - not handled %{FOOBAR}i - partly handled The contents of Foobar: header line(s) in the request sent to the server %l Remote logname - known, but not handled (from identd, if supplied) %m The request method %{Foobar}n The contents of note "Foobar" - not handled from another module. %{Foobar}o The contents of Foobar: header - not handled line(s) in the reply. %p The canonical Port of the - handled server serving the request %P The process ID of the child - not handled that serviced the request. %q The query string (prepended - not handled with a ? if a query string exists otherwise an empty string) %r First line of request - handled %s Status. For requests that got - only '%>s' is handled internally redirected, this is the status of the *original* request --- %...>s for the last. %t Time, in common log format time - handled format (standard english format) %{format}t - not handled The time, in the form given by format, which should be in strftime(3) format. (potentially localised) %T The time taken to serve the - not handled request, in seconds. %u Remote user (from auth; may be - handled bogus if return status (%s) is 401) %U The URL path requested. - handled %v The canonical ServerName of the - handled server serving the request. %V The server name according to - handled the UseCanonicalName setting. */ { { "%h", M_CLF_FIELD_REQ_HOST, "(.+?)"}, { "%l", M_CLF_FIELD_AUTH_USERNAME, "(.+?)"}, { "%u", M_CLF_FIELD_USERNAME, "(.+?)"}, { "%t", M_CLF_FIELD_TIMESTAMP, "\\[([0-9]{2}/[A-Z][a-z]{2}/[0-9]{4}:[0-9]{2}:[0-9]{2}:[0-9]{2} [-+][0-9]{4})\\]"}, { "%r", M_CLF_FIELD_REQUEST, "(.+?)"}, { "%>s", M_CLF_FIELD_STATUS, "([0-9]{1,3})"}, { "%b", M_CLF_FIELD_BYTES_SEND, "([-0-9]+)"}, { "%{User-Agent}i", M_CLF_FIELD_USER_AGENT, "(.*?)"}, { "%{Referer}i", M_CLF_FIELD_REFERRER, "(.*?)"}, { "%T", M_CLF_FIELD_DURATION, "([-0-9]+)"}, { "%D", M_CLF_FIELD_DURATION_US, "([-0-9]+)"}, { "%v", M_CLF_FIELD_SERVER_IP, "(.+)"}, { "%V", M_CLF_FIELD_SERVER_IP, "((?i)[a-z0-9][-.a-z0-9]+[a-z0-9]\\.[a-z0-9]{2,4})"}, { "%p", M_CLF_FIELD_SERVER_PORT, "([-0-9]+)"}, { "%a", M_CLF_FIELD_REMOTE_IP, "([.0-9]+)"}, { "%c", M_CLF_FIELD_CONNECTION_STATUS, "([-+X])"}, { NULL, 0, NULL} }; int parse_clf_field_info(mconfig *ext_conf, const char *logformat) { config_input *conf = ext_conf->plugin_conf; const char *s; const char *errptr; int erroffset = 0; enum {KEY, NOKEY} state = NOKEY; enum {KEY_PLAIN, KEY_BRACES} sub_state = KEY_PLAIN; char buf[255] = ""; char regex_buf[1024] = "^"; int buf_i = 0, i = 0, pos = 0; /* %[a-z] is a key * %{.*}[a-z] is a key * the rest are character that are static */ for (s = logformat; *s; s++) { switch (state) { case NOKEY: if (*s == '%') { buf[buf_i] = '\0'; #if 0 fprintf(stderr, "non-key: %s\n", buf); #endif strcat(regex_buf, buf); /* start the key */ state = KEY; buf_i = 0; buf[buf_i++] = *s; } else { if (*s == '.' || *s == '(' || *s == ')' || *s == '[' || *s == ']') { buf[buf_i++] = '\\'; } buf[buf_i++] = *s; } break; case KEY: switch (sub_state) { case KEY_PLAIN: if ((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z')) { /* finish key */ buf[buf_i++] = *s; buf[buf_i] = '\0'; #if 0 fprintf(stderr, "key: %s\n", buf); #endif for (i = 0; def[i].field != NULL; i++) { if (strncmp(def[i].field, buf, buf_i) == 0) { break; } } if (def[i].field) { if (pos >= M_CLF_MAX_FIELDS) { fprintf(stderr, "pos >= M_CLF_MAX_FIELDS\n"); return -1; } /* set field type */ conf->trans_fields[pos++] = def[i].id; strcat(regex_buf, def[i].match); } else { conf->trans_fields[pos++] = M_CLF_FIELD_UNSET; strcat(regex_buf, "([-_:0-9a-zA-Z]+)"); M_DEBUG1(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS, "Unknown fieldtype: %s\n", buf); } /* set the new non-key buf */ buf_i = 0; state = NOKEY; } else if ( *s == '>' ) { buf[buf_i++] = *s; } else if (*s == '{') { buf[buf_i++] = *s; sub_state = KEY_BRACES; } else { fprintf(stderr, "character not allowed outside of {...}: %c\n", *s); return -1; } break; case KEY_BRACES: if ((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') || (*s >= '0' && *s <= '9') || *s == '_' || *s == '-') { buf[buf_i++] = *s; } else if (*s == '}') { buf[buf_i++] = *s; sub_state = KEY_PLAIN; } else { fprintf(stderr, "character not allowed between {...}: %c\n", *s); return -1; } break; } break; } } buf[buf_i] = '\0'; strcat(regex_buf, buf); strcat(regex_buf, "$"); fprintf(stderr, "regex_buf: %s\n", regex_buf); M_DEBUG1(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_VERBOSE, "match = %s\n", regex_buf); if ((conf->match_clf = pcre_compile(regex_buf, 0, &errptr, &erroffset, NULL)) == NULL) { M_DEBUG1(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS, "regexp compilation error at %s\n", errptr); return -1; } conf->match_clf_extra = pcre_study(conf->match_clf, 0, &errptr); if (errptr != NULL) { M_DEBUG1(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS, "regexp studying error at %s\n", errptr); return -1; } return 0; } int mplugins_input_clf_set_defaults(mconfig *ext_conf) { config_input *conf = ext_conf->plugin_conf; if (conf->inputfilename && strcmp(conf->inputfilename, "-") != 0) { if (mopen(&(conf->inputfile), conf->inputfilename)) { M_DEBUG2(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS, "%s: %s\n", conf->inputfilename, strerror(errno)); return -1; } M_DEBUG1(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_VERBOSE, "(clf) using %s as inputfile\n", conf->inputfilename); } else { /* stdin */ if (mopen(&(conf->inputfile), NULL)) { M_DEBUG2(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS, "%s: %s\n", conf->inputfilename, strerror(errno)); return -1; } M_DEBUG0(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_VERBOSE, "(clf) using (stdin) as inputfile\n"); } if (!conf->format) { const char *errptr; int erroffset = 0; if ((conf->match_clf = pcre_compile( "^(.+?) (.+?) (.+?) " "\\[([0-9]{2}/[A-Z][a-z]{2}/[0-9]{4}:[0-9]{2}:[0-9]{2}:[0-9]{2} [-+][0-9]{4})\\] " "\"(.+?)\" ([-0-9]{1,3}) ([-0-9]+)( \"(.*?)\" \"(.*?)\"| ([A-Z:_]+?)|)\\s*$", 0, &errptr, &erroffset, NULL)) == NULL) { M_DEBUG1(ext_conf->debug_level, M_DEBUG_SECTION_INIT, M_DEBUG_LEVEL_ERRORS, "regexp compilation error at %s\n", errptr); return -1; } } else { if (parse_clf_field_info(ext_conf, conf->format)) { return -1; } } if (!conf->dont_strip_hostname) { conf->dont_strip_hostname = 0; } return 0; } int mplugins_init(mplugin *func) { func->dlinit = mplugins_input_clf_dlinit; func->dlclose = mplugins_input_clf_dlclose; func->parse_config = mplugins_input_clf_parse_config; func->set_defaults = mplugins_input_clf_set_defaults; func->get_next_record = mplugins_input_clf_get_next_record; func->insert_record = NULL; func->gen_report = NULL; func->gen_history = NULL; return 0; }