// CRM114 Regex redirection bounce package this file bounces
// CRM114 regex requests to whichever regex package has been
// compiled and linked in to CRM114.
//
// Adding a new regex package is relatively easy- just mimic the
// ifdef stanzas below to map the functions
//
// crm_regcomp
// crm_regexec
// crm_regerror
// crm_regfree
// crm_regversion
//
// into whatever calls your preferred regex package uses.
//
#include "crm114_sysincludes.h"
char * tre_version (void);
//
// include any local crm114 configuration file
#include "crm114_config.h"
// include the crm114 data structures file
#include "crm114_structs.h"
// and include the routine declarations file
#include "crm114.h"
// Cache for regex compilations
typedef struct {
char *regex;
regex_t *preg; // ptr to struct of {long, void*}
long regex_len;
int cflags;
int status;
} REGEX_CACHE_BLOCK;
#if CRM_REGEX_CACHESIZE > 0
REGEX_CACHE_BLOCK regex_cache[CRM_REGEX_CACHESIZE]
= { { NULL, NULL, 0, 0, 0} } ;
#endif
//
// How to do a register compilation
//
int crm_regcomp (regex_t *preg, char *regex, long regex_len, int cflags)
{
// compile it with the TRE regex compiler
//
// bug workaround - many regex compilers don't compile the null
// regex correctly, but _do_ compile "()" correctly, which
// matches the same thing).
if (regex_len == 0)
{
return (regncomp (preg, "()", 2, cflags));
};
// Are we cacheing compiled regexes? Maybe not...
#if CRM_REGEX_CACHESIZE == 0
if (internal_trace)
{
int i;
fprintf (stderr, "\ncompiling regex '%s', len %ld, in hex: ",
regex, regex_len);
for (i = 0; i < regex_len; i++)
{
fprintf (stderr, "%2X", regex[i]);
};
fprintf (stderr, "\n");
}
return ( regncomp (preg, regex, regex_len, cflags));
#else
// We are cacheing. Scan our cache set for the compiled versions
// of this regex. Note that a length of 0 means "empty bucket".
{
int i, j, found_it;
long rtsize = sizeof (regex_t);
regex_t *ppreg_temp = NULL;
char *regex_temp = NULL;
long rlen_temp = 0;
int cflags_temp = 0;
int status_temp = 0;
if (internal_trace) fprintf (stderr, "Checking the regex cache for %s\n",
regex);
j = 0;
#ifdef REGEX_CACHE_LINEAR_SEARCH
//
// Linear Search uses a strict LRU algorithm to cache
// the precompiled regexes.
//
found_it = 0;
i = -1;
while (!found_it && i < CRM_REGEX_CACHESIZE)
{
i++;
if (regex_len == regex_cache[i].regex_len
&& cflags == regex_cache[i].cflags
&& strncmp (regex_cache[i].regex, regex, regex_len) == 0)
{
// We Found It! Put it into the _temp vars...
if (internal_trace) fprintf (stderr, "found it.\n");
ppreg_temp = regex_cache[i].preg;
regex_temp = regex_cache[i].regex;
rlen_temp = regex_len;
cflags_temp = cflags;
status_temp = regex_cache[i].status;
found_it = i;
};
};
#endif
#ifdef REGEX_CACHE_RANDOM_ACCESS
//
// Random Access uses an associative cache based on
// the hash of the regex (mod the size of the cache).
//
found_it = 0;
i = strnhash (regex, regex_len) % CRM_REGEX_CACHESIZE;
if (regex_len == regex_cache[i].regex_len
&& cflags == regex_cache[i].cflags
&& strncmp (regex_cache[i].regex, regex, regex_len) == 0)
{
// We Found It! Put it into the _temp vars...
if (internal_trace) fprintf (stderr, "found it.\n");
ppreg_temp = regex_cache[i].preg;
regex_temp = regex_cache[i].regex;
rlen_temp = regex_len;
cflags_temp = cflags;
status_temp = regex_cache[i].status;
found_it = i;
};
#endif
// note that on exit, i now is the index where we EITHER found
// the good data, or failed to do so, and found_it tells us which.
//
if ( ! (found_it))
{
// We didn't find it. Do the compilation instead, putting
// the results into the _temp vars.
if (internal_trace) fprintf (stderr, "couldn't find it\n");
regex_temp = (char *) malloc (regex_len + 1);
memcpy (regex_temp, regex, regex_len);
rlen_temp = regex_len;
cflags_temp = cflags;
if (internal_trace)
fprintf (stderr, "Compiling %s (len %ld).\n", regex_temp, rlen_temp);
ppreg_temp = (regex_t *) malloc (rtsize);
if (ppreg_temp == NULL)
fatalerror ("Unable to allocate a pattern register buffer header. ",
"This is hopeless. ");
status_temp =
regncomp (ppreg_temp, regex_temp, rlen_temp, cflags_temp);
// We will always stuff the _temps in at 0
// and pretend that this was at the last index, so it
// moves everything else further down the list.
i = CRM_REGEX_CACHESIZE - 1;
};
// Either way, at this point, the _temp vars contain the new and
// correct regex information; this information has vacated the slot
// at index i,
#ifdef REGEX_CACHE_LINEAR_SEARCH
// If we're in linear search, we move 0 through i-1 down to 1
// through i and then we stuff the _temp vars into the [i] cache
// area. Note that if it was the final slot (at
// CRM_REGEX_CACHESIZE), we have to free the resources up or
// we'll leak them.
//
// Free the resources first, if needed.
//
if (i == CRM_REGEX_CACHESIZE - 1)
{
if (regex_cache[i].preg != NULL)
{
regfree (regex_cache[i].preg);
free (regex_cache[i].preg);
};
if (regex_cache[i].regex != NULL) free (regex_cache[i].regex);
regex_cache[i].regex = NULL;
regex_cache[i].regex_len = 0;
};
// If needed, slide 0 through i-1 down to 1..i, to make room
// at [0]
//
if (i != 0)
{
for (j = i; j > 0; j--)
{
regex_cache[j].preg = regex_cache[j-1].preg;
regex_cache[j].regex = regex_cache[j-1].regex;
regex_cache[j].regex_len = regex_cache[j-1].regex_len;
regex_cache[j].cflags = regex_cache[j-1].cflags;
regex_cache[j].status = regex_cache[j-1].status;
};
};
// and always stuff the _temps (which are correct) in at [0]
regex_cache[0].preg = ppreg_temp;
regex_cache[0].regex = regex_temp;
regex_cache[0].regex_len = rlen_temp;
regex_cache[0].status = status_temp;
regex_cache[0].cflags = cflags_temp;
#endif
#ifdef REGEX_CACHE_RANDOM_ACCESS
//
// In a random access system, we just overwrite the single
// slot that we expected our regex to be in...
// Free the resources first, if needed.
//
if (! found_it)
{
if (regex_cache[i].preg != NULL)
{
regfree (regex_cache[i].preg);
free (regex_cache[i].preg);
};
if (regex_cache[i].regex != NULL) free (regex_cache[i].regex);
regex_cache[i].regex = NULL;
regex_cache[i].regex_len = 0;
};
// and stuff the _temps (which are correct) in at [i]
regex_cache[i].preg = ppreg_temp;
regex_cache[i].regex = regex_temp;
regex_cache[i].regex_len = rlen_temp;
regex_cache[i].status = status_temp;
regex_cache[i].cflags = cflags_temp;
#endif
// Just about done. Set up the return preg..
if (internal_trace)
fprintf (stderr, " About to return\n");
memcpy (preg, ppreg_temp, rtsize);
return (regex_cache[i].status);
};
#endif
}
//
//
// How to do a regex execution from the compiled register
//
int crm_regexec ( regex_t *preg, char *string, long string_len,
size_t nmatch, regmatch_t pmatch[], int eflags,
char *aux_string)
{
if (!string)
{
nonfatalerror("crm_regexec - Regular Expression Execution Problem:\n",
"NULL pointer to the string to match .");
return (REG_NOMATCH);
};
if (aux_string == NULL
|| strlen (aux_string) < 1)
{
return (regnexec (preg, string, string_len, nmatch, pmatch, eflags));
}
else
{
int i;
// parse out the aux string for approximation parameters
regamatch_t mblock;
regaparams_t pblock;
mblock.nmatch = nmatch;
mblock.pmatch = pmatch;
sscanf (aux_string, "%d %d %d %d",
&pblock.cost_subst,
&pblock.cost_ins,
&pblock.max_cost,
&pblock.cost_del);
if (user_trace)
fprintf (stderr,
"Using approximate match. Costs: Subst %d Ins %d Max %d Del %d \n",
pblock.cost_subst,
pblock.cost_ins,
pblock.max_cost,
pblock.cost_del);
// now we can run the actual match
i = reganexec (preg, string, string_len, &mblock, pblock, eflags);
if (user_trace)
fprintf (stderr, "approximate Regex match returned %d .\n", i);
return (i);
};
}
size_t crm_regerror (int errorcode, regex_t *preg, char *errbuf,
size_t errbuf_size)
{
return (regerror (errorcode, preg, errbuf, errbuf_size));
};
void crm_regfree (regex_t *preg)
{
#if CRM_REGEX_CACHESIZE > 0
// nothing! yes indeed, if we are using cacheing, we don't free
// till and unless we decache, so crm_regfree is a noop.
return;
#else
return (regfree (preg));
#endif
};
char * crm_regversion ()
{
static char vs[129];
strcat (vs, (char *) tre_version ());
return (vs);
};
syntax highlighted by Code2HTML, v. 0.9.1