/* FuzzDict:
*
* A Dict with fuzzy matching. "fuzzy" means will match shortest
* unique prefix. Need to find a better name.
*/
#define _CLASS_FuzzDict_PRIVATE_
#include "fuzzdict.h"
typedef dict_key_t Key_t;
typedef dict_value_t Value_t;
#define SUPER_new() ((FuzzDict) dict_new ())
#define SUPER_dispose(S) ((FuzzDict) dict_dispose ((Dict)S))
#define SUPER_add(S, K, V) dict_add ((Dict)(S), K, V)
#define SUPER_ignoreCase(S,Y) dict_ignoreCase ((Dict)S, Y)
#define SUPER_find(S, K) dict_find ((Dict)(S), K)
#define SUPER_remove(S, K) dict_remove ((Dict)(S), K)
#define SUPER_keys(S) dict_keys ((Dict)(S))
extern int strcmp(), strcasecmp(), strncmp(), strncasecmp();
/* Private function declarations */
#ifdef __STDC__
static int strsortcmp(
char** s1,
char** s2
);
static int strsortcasecmp(
char** s1,
char** s2
);
static Key_t fuzzdict_bsearch(
FuzzDict self,
Key_t key
);
#else
static int strsortcmp(), strsortcasecmp();
static Key_t fuzzdict_bsearch();
#endif
/* Instance Creation, Deletion:
*
* new(), dispose()
*/
/* Create and return handle on new FuzzDict.
*
* We call the superclass new() function and reallocate
* the result up to our larger size.
*/
FuzzDict
fuzzdict_new ()
{
FuzzDict self;
if ((self = SUPER_new()) == 0)
return 0;
self = (FuzzDict)realloc (self, sizeof *self);
self->keylist = 0;
self->keylistValid = 0;
self->sort = strsortcasecmp;
self->compare = strncasecmp;
return self;
}
/* Discard this instance of FuzzDict
*/
FuzzDict
fuzzdict_dispose (self)
FuzzDict self;
{
if (self == 0)
return 0;
if (self->keylist)
free (self->keylist);
return SUPER_dispose (self);
}
/* Accessing:
*
* add(), remove(), find()
*/
/* The add and remove functions simply invalidate the sorted list
* and call the superclass functions.
*/
Value_t
fuzzdict_add (self, key, value)
FuzzDict self;
Key_t key;
Value_t value;
{
Value_t rslt;
if ((rslt = SUPER_add (self, key, value)) == 0)
self->keylistValid = 0;
return rslt;
}
Value_t
fuzzdict_remove (self, key)
FuzzDict self;
Key_t key;
{
Value_t rslt;
if (rslt = SUPER_remove (self, key))
self->keylistValid = 0;
return rslt;
}
/* All the above are just pro-forma wrappers.
* The find function is the real meat of this class
*/
Value_t
fuzzdict_find (self, key)
FuzzDict self;
Key_t key;
{
Value_t ret;
/* Do we have an exact match? */
if (ret = SUPER_find (self, key))
return ret;
/*
* See if we can find a unique match on the prefix.
*/
if (key = fuzzdict_bsearch (self, key))
return SUPER_find (self, key);
/* Can't find it */
return 0;
}
/* Miscellaneous
*
* ignoreCase()
*/
/* Set case matching behavior.
* yesno = 1: ignore case distinctions.
* yesno = 0: respect case distinctions.
*/
int
fuzzdict_ignoreCase (self, yesno)
FuzzDict self;
int yesno;
{
int ret;
/* If the superclass function returns an error, we don't
* need to change anything.
*
* Otherwise, alter the sort and comparison functions
* appropriately.
*/
if ((ret = SUPER_ignoreCase (self, yesno)) >= 0) {
self->keylistValid = 0;
if (yesno) {
self->sort = strsortcasecmp;
self->compare = strncasecmp;
}
else {
self->sort = strsortcmp;
self->compare = strncmp;
}
}
return ret;
}
/* Private functions:
*
* strsortcmp(), strsortcasecmp(), bsearch()
*/
/* Sort comparison functions. We only need these because
* qsort passes the address of a list datum, and we need to
* dereference that one level.
*/
static int
strsortcmp (s1, s2)
char** s1;
char** s2;
{
return strcmp (*s1, *s2);
}
static int
strsortcasecmp (s1, s2)
char** s1;
char** s2;
{
return strcasecmp (*s1, *s2);
}
/* Return the full key matching <key>
*
* We can't use the C library bsearch function here because
* it doesn't allow us to pass a third length parameter
* to the comparison function.
*/
static Key_t
fuzzdict_bsearch (self, key)
FuzzDict self;
Key_t key;
{
int low, high, mid;
int len, rslt;
/* Get sorted list of actual keys, if required */
if (!self->keylistValid) {
if (self->keylist)
free (self->keylist);
self->keylist = fuzzdict_keys (self);
qsort ((char*)self->keylist, self->nitems,
sizeof (Key_t), self->sort);
self->keylistValid = 1;
}
/* Perform the binary search: classical algorithm */
low = 0; high = self->nitems;
len = strlen (key);
while (low <= high) {
Key_t* currp;
mid = (low+high)/2;
currp = &self->keylist[mid];
rslt = self->compare (key, *currp, len);
if (rslt < 0)
high = mid - 1;
else if (rslt > 0)
low = mid + 1;
else {
/* Okay, we now have a match, at least to the
* significance of the argument key.
* Now see if the same key would match adjacent
* entries.
*/
if ((mid > 0 && !self->compare (key, currp[-1], len))
|| (mid < (self->nitems-1)
&& !self->compare (key, currp[1], len))) {
self->error = "key not unique";
return 0;
}
return *currp;
}
}
return 0;
}
#ifdef TEST
#include <stdio.h>
main (argc, argv)
int argc;
char** argv;
{
FuzzDict aft;
int i;
FILE* file;
char* rslt;
char buf[512];
char *vec[12];
int lineno;
aft = fuzzdict_new();
if (argc == 1)
die ("Have to specify a file for symbols to install");
for (argc--,argv++; argc > 0; argc--,argv++) {
((file = fopen (*argv, "r")) || die ("Can't open '%s'", *argv));
printf ("Adding tokens from '%s'\n", *argv);
lineno = 1;
while (cfgets (buf, sizeof buf, file, &lineno)) {
i = strsplit (buf, vec, 12, 0);
if (i == 0)
continue;
fuzzdict_add(aft, vec[0], strdup (vec[0]));
}
fclose (file);
}
printf ("enter keys to look up -- EOF to quit\n");
while (fgets (buf, sizeof buf, stdin)) {
i = strsplit (buf, vec, 12, 0);
if (i == 0)
continue;
rslt = fuzzdict_find(aft, vec[0]);
if (rslt)
printf ("lookup on '%s' yields '%s'\n",
vec[0], rslt);
else
printf ("lookup on '%s' yields NIL: '%s'\n",
vec[0], aft->error);
}
printf ("\n");
}
die (s, a1, a2, a3)
char* s;
int a1, a2, a3;
{
fprintf (stderr, "die: ");
fprintf (stderr, s, a1, a2, a3);
putc ('\n', stderr);
fflush (stderr);
exit (1);
}
#endif /* TEST */
syntax highlighted by Code2HTML, v. 0.9.1