#include "my-stdlib.h" #include "log.h" #include "storage.h" #include "str_intern.h" #include "utils.h" struct intern_entry { const char *s; unsigned hash; struct intern_entry *next; }; /* If we're using the intern table during db load, we have a stunning * opportunity to fragment memory with little intern_entry * structures. So, the inevitable suballocator. * * On Linux (at least) malloc() calls past a certain size are * converted to mmap() allocations. That's really nice since we * don't bloat sbrk() with memory we'll never use past db load. * That makes us look better in /usr/bin/top. */ struct intern_entry_hunk { int size; int handout; struct intern_entry *contents; struct intern_entry_hunk *next; }; static struct intern_entry_hunk *intern_alloc = NULL; static struct intern_entry_hunk * new_intern_entry_hunk(int size) { struct intern_entry_hunk *new; new = mymalloc(sizeof(struct intern_entry_hunk), M_INTERN_HUNK); new->size = size; new->handout = 0; new->contents = mymalloc(sizeof(struct intern_entry) * size, M_INTERN_ENTRY); new->next = NULL; return new; } /* Chosen large enough to trigger the mmap() semantics of linux malloc. */ #define INTERN_ENTRY_HUNK_SIZE 100000 static struct intern_entry * allocate_intern_entry(void) { if (intern_alloc == NULL) { intern_alloc = new_intern_entry_hunk(INTERN_ENTRY_HUNK_SIZE); } if (intern_alloc->handout < intern_alloc->size) { struct intern_entry *e; e = &(intern_alloc->contents[intern_alloc->handout]); intern_alloc->handout++; return e; } else { struct intern_entry_hunk *new_hunk = new_intern_entry_hunk(INTERN_ENTRY_HUNK_SIZE); new_hunk->next = intern_alloc; intern_alloc = new_hunk; return allocate_intern_entry(); } } static void free_intern_entry_hunks(void) { struct intern_entry_hunk *h, *next; for (h = intern_alloc; h; h = next) { next = h->next; free(h->contents); free(h); } intern_alloc = NULL; } /**********************/ static struct intern_entry **intern_table; static int intern_table_size = 0; static int intern_table_count = 0; static int intern_bytes_saved = 0; static int intern_allocations_saved = 0; #define INTERN_TABLE_SIZE_INITIAL 10007 static struct intern_entry ** make_intern_table(int size) { struct intern_entry **table; int i; table = mymalloc(sizeof(struct intern_entry *) * size, M_INTERN_POINTER); for (i = 0; i < size; i++) { table[i] = NULL; } return table; } void str_intern_open(int table_size) { if (table_size == 0) { table_size = INTERN_TABLE_SIZE_INITIAL; } intern_table = make_intern_table(table_size); intern_table_size = table_size; intern_bytes_saved = 0; intern_allocations_saved = 0; } extern void str_intern_close(void) { int i; struct intern_entry *e, *next; for (i = 0; i < intern_table_size; i++) { for (e = intern_table[i]; e; e = next) { next = e->next; free_str(e->s); /* myfree(e, M_INTERN_ENTRY); */ } } myfree(intern_table, M_INTERN_POINTER); intern_table = NULL; free_intern_entry_hunks(); oklog("INTERN: %d allocations saved, %d bytes\n", intern_allocations_saved, intern_bytes_saved); oklog("INTERN: at end, %d entries in a %d bucket hash table.\n", intern_table_count, intern_table_size); } static struct intern_entry * find_interned_string(const char *s, unsigned hash) { int bucket = hash % intern_table_size; struct intern_entry *p; for (p = intern_table[bucket]; p; p = p->next) { if (hash == p->hash) { if (!strcmp(s, p->s)) { return p; } } } return NULL; } /* Caller must addref s */ static void add_interned_string(const char *s, unsigned hash) { int bucket = hash % intern_table_size; struct intern_entry *p; /* p = mymalloc(sizeof(struct intern_entry), M_INTERN_ENTRY); */ p = allocate_intern_entry(); p->s = s; p->hash = hash; p->next = intern_table[bucket]; intern_table[bucket] = p; intern_table_count++; } static void intern_rehash(int new_size) { struct intern_entry **new_table; int i, count; struct intern_entry *e, *next; count = 0; new_table = make_intern_table(new_size); for (i = 0; i < intern_table_size; i++) { for (e = intern_table[i]; e; e = next) { int new_bucket = e->hash % new_size; /* Keep the next pointer, since we're gonna nuke it. */ next = e->next; e->next = new_table[new_bucket]; new_table[new_bucket] = e; count++; } } if (count != intern_table_count) { errlog("counted %d entries in intern hash table, but intern_table_count says %d!\n", count, intern_table_count); } intern_table_size = new_size; myfree(intern_table, M_INTERN_POINTER); intern_table = new_table; } /* Make an immutable copy of s. If there's an intern table open, possibly share storage. */ extern const char * str_intern(const char *s) { struct intern_entry *e; unsigned hash; const char *r; if (s == NULL || *s == '\0') { /* str_dup already has a canonical empty string */ return str_dup(s); } if (intern_table == NULL) { return str_dup(s); } hash = str_hash(s); e = find_interned_string(s, hash); if (e != NULL) { intern_allocations_saved++; intern_bytes_saved += strlen(s); return str_ref(e->s); } if (intern_table_count > intern_table_size) { intern_rehash(intern_table_size * 2); } r = str_dup(s); r = str_ref(r); add_interned_string(r, hash); return r; }