/* ** Modular Logfile Analyzer ** Copyright 2000 Jan Kneschke ** ** Homepage: http://www.modlogan.org ** This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version, and provided that the above copyright and permission notice is included with all distributed copies of this or derived software. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA ** ** $Id: mhash.c,v 1.23 2004/08/27 20:07:37 ostborn Exp $ */ #include #include #include #include #include #include "config.h" #include "mconfig.h" #include "mlist.h" #include "mhash.h" #include "mdatatypes.h" #define M_HASH_RESIZE_CHECK_DEFAULT 8 extern size_t mem_mhash_size; extern size_t mem_mhash_count; int mhash_unfold(mhash *h, mlist *l ) { int i = 0; for ( i = 0; i < h->size; i++) { if (h->data[i]->list) { mlist *hl; for (hl = h->data[i]->list; hl; hl = hl->next) { mdata *data; if (!hl->data) continue; data = mdata_copy(hl->data); mlist_insert(l, data); } } } return 0; } int mhash_unfold_sorted_limited(mhash *h, mlist *l, int count ) { int i, j, max; mdata *data = NULL, *ins_data; #if 0 fprintf(stderr, "%s.%d: h: %p, l: %p, count: %d\n", __FILE__, __LINE__, h, l, count); #endif for ( j = 0; j < count; j++) { data = NULL; max = 0; for ( i = 0; i < h->size; i++) { if (h->data[i]->list) { mlist *hl; hl = h->data[i]->list; while (hl) { if (hl->data) { if (mdata_get_count(hl->data) > max) { max = mdata_get_count(hl->data); data = hl->data; } } hl = hl->next; } } } if (data) { ins_data = mdata_copy(data); mlist_insert(l, ins_data); /* marking element as processed */ mdata_set_count(data, - mdata_get_count(data)); } } /* resetting count */ for ( i = 0; i < h->size; i++) { if (h->data[i]->list) { mlist *hl; hl = h->data[i]->list; while (hl) { if (hl->data) { if (mdata_get_count(hl->data) < 0) { data = hl->data; mdata_set_count(hl->data, - mdata_get_count(hl->data)); } } hl = hl->next; } } } return 0; } int mhash_unfold_sorted_limited_vcount(mhash *h, mlist *l, int count ) { int i, j; double max; mdata *data = NULL, *ins_data; #if 0 fprintf(stderr, "%s.%d: h: %p, l: %p, count: %d\n", __FILE__, __LINE__, h, l, count); #endif for ( j = 0; j < count; j++) { data = NULL; max = 0.0; for ( i = 0; i < h->size; i++) { if (h->data[i]->list) { mlist *hl; hl = h->data[i]->list; while (hl) { if (hl->data) { if (mdata_get_count(hl->data) > 0) { /* negative counts mark already processed values */ if (mdata_get_vcount(hl->data) > max) { max = mdata_get_vcount(hl->data); data = hl->data; } } } hl = hl->next; } } } if (data) { ins_data = mdata_copy(data); mlist_insert(l, ins_data); /* marking element as processed */ /* marking is done using count as well, because vcount is _unsigned_ */ mdata_set_count(data, - mdata_get_count(data)); } } /* resetting count */ for ( i = 0; i < h->size; i++) { if (h->data[i]->list) { mlist *hl; hl = h->data[i]->list; while (hl) { if (hl->data) { if (mdata_get_count(hl->data) < 0) { data = hl->data; mdata_set_count(hl->data, - mdata_get_count(hl->data)); } } hl = hl->next; } } } return 0; } int mhash_calc (mhash *h, const char *str) { unsigned long i = 0; while (*str) i = *str++ + 31 * i; return (i % h->size); } mhash *mhash_init (int size) { int i = 0; mhash *h; mem_mhash_count++; h = malloc(sizeof(mhash)); assert(h); memset(h, 0, sizeof(mhash)); mem_mhash_size += sizeof(mhash); h->size = size; h->data = malloc(sizeof(mhash_data *) * h->size); h->resize_check = M_HASH_RESIZE_CHECK_DEFAULT; mem_mhash_size += sizeof(mhash_data *) * h->size; for (i = 0; i < h->size; i++) { h->data[i] = malloc(sizeof(mhash_data)); assert(h->data[i]); h->data[i]->list = mlist_init(); h->data[i]->size = 0; } mem_mhash_size += sizeof(mhash_data) * h->size; return h; } int mhash_free (mhash *h) { int i = 0; for (i = 0; i < h->size; i++) { mlist_free(h->data[i]->list); free(h->data[i]); } free(h->data); free(h); return 0; } int mhash_resize(mhash *h, int newsize) { mhash *nh = mhash_init(newsize); int i = 0; for (i = 0; i < h->size; i++) { mlist *l; l = h->data[i]->list; for (l = h->data[i]->list; l && l->data; l = l->next) { int newndx; mdata *data = l->data; char *key = NULL; key = data->key; if (key == NULL) return -1; newndx = mhash_calc(nh, key); /* insert data into the new hash */ if (mlist_insert_sorted(nh->data[newndx]->list, data) == 1) { nh->data[newndx]->size++; } else { fprintf(stderr, "%s.%d: ??\n", __FILE__, __LINE__); } /* remove the pointer in the old list */ l->data = NULL; } /* remove the list */ mlist_free(h->data[i]->list); /* unset the list in the old hash */ h->data[i]->list = NULL; /* free the list-handle in the old hash */ free(h->data[i]); } /* free the data-handle in the old-hash */ free(h->data); /* move the new data-set and the new size to the old hash-structure */ h->data = nh->data; h->size = nh->size; h->resize_check = M_HASH_RESIZE_CHECK_DEFAULT; free(nh); return 0; } int mhash_insert_sorted(mhash *h, mdata *data) { int ndx = 0, i; char *key = NULL; /* set a upper limit for the hash size */ if (h->size <= 1024 && h->resize_check <= 0) { /* resize the hash if neccesary */ int highest = 0; for ( i = 0; i < h->size; i++) { if (h->data[i]->size >= 32) { mhash_resize(h, h->size * 2 + 1); break; } if (h->data[i]->size > highest) { highest = h->data[i]->size; } } if (i == h->size) { /* no resize happend */ /* set new resize-check */ h->resize_check = M_HASH_RESIZE_CHECK_DEFAULT; if ((32 - highest) > h->resize_check) { /* we have enough space to delay the resize-check even more */ h->resize_check = (32 - highest); } } else { int highest = 0; for ( i = 0; i < h->size; i++) { if (h->data[i]->size > highest) { highest = h->data[i]->size; } } if ((32 - highest) > h->resize_check) { /* we have enough space to delay the resize-check even more */ h->resize_check = (32 - highest); } } } else { } if (data == NULL) return -1; key = data->key; if (key == NULL) return -1; ndx = mhash_calc(h, key); if (mlist_insert_sorted(h->data[ndx]->list, data) == 1) { h->data[ndx]->size++; h->resize_check--; } else { #if 0 if (mlist_count(h->data[ndx]->list) != h->data[ndx]->size) fprintf(stderr, "%s.%d: append (%d == %d) ??\n", __FILE__, __LINE__, mlist_count(h->data[ndx]->list), h->data[ndx]->size); #endif } return 0; } int mhash_insert_replace(mhash *h, mdata *data) { int ndx = 0; mlist *l; if (data == NULL) return -1; if (data->key == NULL) return -1; ndx = mhash_calc(h, data->key); for (l = h->data[ndx]->list; l && l->data; l = l->next) { if (0 == strcmp(l->data->key, data->key)) { /* found */ mdata_free(l->data); l->data = data; break; } } if (l == NULL || l->data == NULL) { if (mlist_insert_sorted(h->data[ndx]->list, data) == 1) { h->data[ndx]->size++; } } return 0; } int mhash_count(mhash *h) { int i, c = 0; if (!h) return 0; for ( i = 0; i < h->size; i++) { #if 0 c += mlist_count(h->data[i]->list); #else c += h->data[i]->size; #endif } return c; } mdata *mhash_get_data(mhash *h, const char *str) { int ndx; ndx = mhash_calc(h, str); return mlist_get_data(h->data[ndx]->list, str); } int mhash_in_hash(mhash *h, const char *str) { int ndx; ndx = mhash_calc(h, str); return mlist_in_list(h->data[ndx]->list, str); } mdata **mhash_sorted_to_marray(const mhash *h, int sortby, int sortdir) { int j = 0; /* number of data-elements */ int i; mdata **md, **r_md; int *a, *b; /* count the data elements */ for ( i = 0; i < h->size; i++) { if (h->data[i]->list) { mlist *hl; for (hl = h->data[i]->list; hl; hl = hl->next) { if (hl->data) { j++; } } } } /* unfold the hash */ md = malloc(sizeof(mdata *) * j); /* add one for the NULL termination */ r_md = malloc(sizeof(mdata *) * (j + 1)); a = malloc(sizeof(int) * j); b = malloc(sizeof(int) * j); j = 0; for ( i = 0; i < h->size; i++) { if (h->data[i]->list) { mlist *hl; for (hl = h->data[i]->list; hl; hl = hl->next) { if (hl->data) { a[j] = j; md[j++] = hl->data; } } } } /* sort */ mdata_array_msort(md, a, b, 0, j-1, sortby, sortdir); /* 'a' contains the sorted indexes */ /* copy the data-element into the returned array */ for (i = 0; i < j; i++) r_md[i] = md[a[i]]; r_md[i] = NULL; free(a); free(b); free(md); return r_md; } int mhash_get_value(mhash *h, const char *key) { int i; if (!h) return 0; for ( i = 0; i < h->size; i++) { mlist *l; for (l = h->data[i]->list; l && l->data; l = l->next) { mdata *data = l->data; if (0 == strcmp(key, data->key)) { return mdata_get_count(data); } } } return 0; } long mhash_sumup(mhash *h) { int i, c = 0; if (!h) return 0; for ( i = 0; i < h->size; i++) { c += mlist_sumup(h->data[i]->list); } return c; } double mhash_sumup_vcount(mhash *h) { int i; double c = 0; if (!h) return 0; for ( i = 0; i < h->size; i++) { c += mlist_sumup_vcount(h->data[i]->list); } return c; }