// crm_str_funcs.c - Controllable Regex Mutilator, version v1.0
// Copyright 2001-2006 William S. Yerazunis, all rights reserved.
//
// This software is licensed to the public under the Free Software
// Foundation's GNU GPL, version 2. You may obtain a copy of the
// GPL by visiting the Free Software Foundations web site at
// www.fsf.org, and a copy is included in this distribution.
//
// Other licenses may be negotiated; contact the
// author for details.
//
// include some standard files
#include "crm114_sysincludes.h"
// include any local crm114 configuration file
#include "crm114_config.h"
// include the crm114 data structures file
#include "crm114_structs.h"
// and include the routine declarations file
#include "crm114.h"
// the command line argc, argv
extern int prog_argc;
extern char **prog_argv;
// the auxilliary input buffer (for WINDOW input)
extern char *newinputbuf;
// the globals used when we need a big buffer - allocated once, used
// wherever needed. These are sized to the same size as the data window.
extern char *inbuf;
extern char *outbuf;
extern char *tempbuf;
// strnhash - generate the hash of a string of length N
// goals - fast, works well with short vars includng
// letter pairs and palindromes, not crypto strong, generates
// hashes that tend toward relative primality against common
// hash table lengths (so taking the output of this function
// modulo the hash table length gives a relatively uniform distribution
//
// In timing tests, this hash function can hash over 10 megabytes
// per second (using as text the full 2.4.9 linux kernel source)
// hashing individual whitespace-delimited tokens, on a Transmeta
// 666 MHz.
/***** OLD VERSION NOT 64-BIT PORTABLE DON'T USE ME *********
long strnhash (char *str, long len)
{
long i;
long hval;
char *hstr;
char chtmp;
// initialize hval
hval= len;
hstr = (char *) &hval;
// for each character in the incoming text:
for ( i = 0; i < len; i++)
{
// xor in the current byte against each byte of hval
// (which alone gaurantees that every bit of input will have
// an effect on the output)
//hstr[0] = (hstr[0] & ( ~ str[i] ) ) | ((~ hstr [0]) & str[i]);
//hstr[1] = (hstr[1] & ( ~ str[i] ) ) | ((~ hstr [1]) & str[i]);
//hstr[2] = (hstr[2] & ( ~ str[i] ) ) | ((~ hstr [2]) & str[i]);
//hstr[3] = (hstr[3] & ( ~ str[i] ) ) | ((~ hstr [3]) & str[i]);
hstr[0] ^= str[i];
hstr[1] ^= str[i];
hstr[2] ^= str[i];
hstr[3] ^= str[i];
// add some bits out of the middle as low order bits.
hval = hval + (( hval >> 12) & 0x0000ffff) ;
// swap bytes 0 with 3
chtmp = hstr [0];
hstr[0] = hstr[3];
hstr [3] = chtmp;
// rotate hval 3 bits to the left (thereby making the
// 3rd msb of the above mess the hsb of the output hash)
hval = (hval << 3 ) + (hval >> 29);
}
return (hval);
}
****/
// This is a more portable hash function, compatible with the original.
// It should return the same value both on 32 and 64 bit architectures.
// The return type was changed to unsigned long hashes, and the other
// parts of the code updated accordingly.
// -- Fidelis
unsigned long strnhash (char *str, long len)
{
long i;
// unsigned long hval;
int32_t hval;
unsigned long tmp;
// initialize hval
hval= len;
// for each character in the incoming text:
for ( i = 0; i < len; i++)
{
// xor in the current byte against each byte of hval
// (which alone gaurantees that every bit of input will have
// an effect on the output)
tmp = str[i] & 0xFF;
tmp = tmp | (tmp << 8) | (tmp << 16) | (tmp << 24);
hval ^= tmp;
// add some bits out of the middle as low order bits.
hval = hval + (( hval >> 12) & 0x0000ffff) ;
// swap most and min significative bytes
tmp = (hval << 24) | ((hval >> 24) & 0xff);
hval &= 0x00ffff00; // zero most and min significative bytes of hval
hval |= tmp; // OR with swapped bytes
// rotate hval 3 bits to the left (thereby making the
// 3rd msb of the above mess the hsb of the output hash)
hval = (hval << 3) + (hval >> 29);
}
return (hval);
}
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
//
// Cached mmap stuff. Adapted from Win32 compatibility code from
// Barry Jaspan. Altered to not reveal the difference between a
// mapped file pointer and one of Barry's 'map' structs. In this
// code (unlike Barry's patches), all that is ever seen are
// pointers to memory (i.e. crm_mmap and crm_munmap have the same
// API and semantics as with the libc mmap() and munmap() calls),
// no structs are ever seen by the callers of this code.
//
// Bugs in the POSIX code are my fault. Bugs in the WIN32 code are
// either mine or his. So there.
//
///////////////////////////////////////////////////////////////////////////
//
// This code section (from this line to the line below that states
// that it is the end of the dual-licensed code section) is
// copyright and owned by William S. Yerazunis. In return for
// addition of significant derivative work, Barry Jaspan is hereby
// granted a full unlimited license to use this code section,
// including license to relicense under other licenses.
//
////////////////////////////////////////////////////////////////////////////
// An mmap cell. This is how we cache.
//
typedef struct prototype_crm_mmap_cell
{
char *name;
long start;
long requested_len;
long actual_len;
void *addr;
long prot; // prot flags to be used, in the mmap() form
// that is, PROT_*, rather than O_*
long mode; // Mode is things like MAP_SHARED or MAP_LOCKED
int unmap_count; // counter - unmap this after UNMAP_COUNT_MAX
struct prototype_crm_mmap_cell *next, *prev;
#ifdef POSIX
int fd;
#endif
#ifdef WIN32
HANDLE fd, mapping;
#endif
} CRM_MMAP_CELL;
// We want these to hang around but not be visible outside this file.
static CRM_MMAP_CELL *cache = NULL;
//////////////////////////////////////
//
// Force an unmap (don't look at the unmap_count, just do it)
// Watch out tho- this takes a CRM_MMAP_CELL, not a *ptr, so don't
// call it from anywhere except inside this file.
//
void crm_unmap_file_internal ( CRM_MMAP_CELL *map)
{
long munmap_status;
#ifdef POSIX
if (map->prot & PROT_WRITE)
msync (map->addr, map->actual_len, MS_ASYNC | MS_INVALIDATE);
munmap_status = munmap (map->addr, map->actual_len);
// fprintf (stderr, "Munmap_status is %ld\n", munmap_status);
// Because mmap/munmap doesn't set atime, nor set the "modified"
// flag, some network filesystems will fail to mark the file as
// modified and so their cacheing will make a mistake.
//
// The fix is that for files that were mmapped writably, to do
// a trivial read/write on the mapped file, to force the
// filesystem to repropagate it's caches.
//
if (map->prot & PROT_WRITE)
{
FEATURE_HEADER_STRUCT foo;
lseek (map->fd, 0, SEEK_SET);
read (map->fd, &foo, sizeof(foo));
lseek (map->fd, 0, SEEK_SET);
write (map->fd, &foo, sizeof(foo));
}
// Although the docs say we can close the fd right after mmap,
// while leaving the mmap outstanding even though the fd is closed,
// actual testing versus several kernels shows this leads to
// broken behavior. So, we close here instead.
//
close (map->fd);
// fprintf (stderr, "U");
#endif
#ifdef WIN32
FlushViewOfFile(map->addr, 0);
UnmapViewOfFile(map->addr);
CloseHandle(map->mapping);
CloseHandle(map->fd);
#endif
}
/////////////////////////////////////////////////////
//
// Hard-unmap by filename. Do this ONLY if you
// have changed the file by some means outside of
// the mmap system (i.e. by writing via fopen/fwrite/fclose).
//
void crm_force_munmap_filename (char *filename)
{
CRM_MMAP_CELL *p;
// Search for the file - if it's already mmaped, unmap it.
// Note that this is a while loop and traverses the list.
for (p = cache; p != NULL; p = p->next)
{
if (strcmp(p->name, filename) == 0)
{
// found it... force an munmap.
crm_force_munmap_addr (p->addr);
}
}
}
//////////////////////////////////////////////////////
//
// Hard-unmap by address. Do this ONLY if you
// have changed the file by some means outside of
// the mmap system (i.e. by writing via fopen/fwrite/fclose).
//
void crm_force_munmap_addr (void *addr)
{
CRM_MMAP_CELL *p;
// step 1- search the mmap cache to see if we actually have this
// mmapped
//
p = cache;
while ( p != NULL && p->addr != addr)
p = p->next;
if ( ! p )
{
nonfatalerror ("Internal fault - this code has tried to force unmap memory "
"that it never mapped in the first place. ",
"Please file a bug report. ");
return;
}
// Step 2: we have the mmap cell of interest. Mark it for real unmapping.
//
p->unmap_count = UNMAP_COUNT_MAX + 1;
// Step 3: use the standard munmap to complete the unmapping
crm_munmap_file (addr);
return;
}
//////////////////////////////////////////////////////
//
// This is the wrapper around the "traditional" file unmap, but
// does cacheing. It keeps count of unmappings and only unmaps
// when it needs to.
//
void crm_munmap_file (void *addr)
{
CRM_MMAP_CELL *p;
// step 1- search the mmap cache to see if we actually have this
// mmapped
//
p = cache;
while ( p != NULL && p->addr != addr)
p = p->next;
if ( ! p )
{
nonfatalerror ("Internal fault - this code has tried to unmap memory "
"that it never mapped in the first place. ",
"Please file a bug report. ");
return;
}
// Step 2: we have the mmap cell of interest. Do the right thing.
//
p->unmap_count = (p->unmap_count) + 1;
if (p->unmap_count > UNMAP_COUNT_MAX)
{
crm_unmap_file_internal (p);
//
// File now unmapped, take the mmap_cell out of the cache
// list as well.
//
if (p->prev != NULL)
p->prev->next = p->next;
else
cache = p->next;
if (p->next != NULL)
p->next->prev = p->prev;
free(p->name);
free(p);
}
else
{
if (p->prot & PROT_WRITE)
{
#ifdef POSIX
msync (p->addr, p->actual_len, MS_ASYNC | MS_INVALIDATE);
#endif
#ifdef WIN32
//unmap our view of the file, which will lazily write any
//changes back to the file
UnmapViewOfFile(p->addr);
//and remap so we still have it open
p->addr = MapViewOfFile(p->mapping, (p->mode &
MAP_PRIVATE)?FILE_MAP_COPY:((p->prot &
PROT_WRITE)?FILE_MAP_WRITE:FILE_MAP_READ), 0, 0, 0);
//if the remap failed for some reason, just free everything
// and get rid of this cached mmap entry.
if (p->addr == NULL)
{
CloseHandle(p->mapping);
CloseHandle(p->fd);
if (p->prev != NULL)
p->prev->next = p->next;
else
cache = p->next;
if (p->next != NULL)
p->next->prev = p->prev;
free(p->name);
free(p);
}
#endif
}
}
}
/////////////////////////////////////////////////////////
//
// Force an Unmap on every mmapped memory area we know about
void crm_munmap_all()
{
while (cache != NULL) {
cache->unmap_count = UNMAP_COUNT_MAX + 1;
crm_munmap_file (cache->addr);
}
}
//////////////////////////////////////////////////////////
//
// MMap a file in (or get the map from the cache, if possible)
// (length is how many bytes to get mapped, remember!)
//
// prot flags are in the mmap() format - that is, PROT_, not O_ like open.
// (it would be nice if length could be self-generated...)
void *crm_mmap_file (char *filename,
long start, long requested_len, long prot, long mode,
long *actual_len)
{
CRM_MMAP_CELL *p;
long pagesize = 0;
struct stat statbuf;
#ifdef POSIX
mode_t open_flags;
#endif
#ifdef WIN32
DWORD open_flags = 0;
DWORD createmap_flags = 0;
DWORD openmap_flags = 0;
#endif
pagesize = 0;
// Search for the file - if it's already mmaped, just return it.
for (p = cache; p != NULL; p = p->next)
{
if (strcmp(p->name, filename) == 0
&& p->prot == prot
&& p->mode == mode
&& p->start == start
&& p->requested_len == requested_len)
{
if (actual_len)
*actual_len = p->actual_len;
return (p->addr);
}
}
// No luck - we couldn't find the matching file/start/len/prot/mode
// We need to add an mmap cache cell, and mmap the file.
//
p = (void *) malloc( sizeof ( CRM_MMAP_CELL) );
if (p == NULL)
{
untrappableerror(" Unable to malloc enough memory for mmap cache. ",
" This is unrecoverable. Sorry.");
return MAP_FAILED;
}
p->name = strdup(filename);
p->start = start;
p->requested_len = requested_len;
p->prot = prot;
p->mode = mode;
#ifdef POSIX
open_flags = O_RDWR;
if ( ! (p->prot & PROT_WRITE) && (p->prot & PROT_READ) )
open_flags = O_RDONLY;
if ( (p->prot & PROT_WRITE) && !(p->prot & PROT_READ))
open_flags = O_WRONLY;
if (internal_trace)
fprintf (stderr, "MMAP file open mode: %ld\n", (long) open_flags);
// if we need to, we stat the file
if (p->requested_len < 0)
{
long k;
k = stat (p->name, &statbuf);
if ( k != 0 )
{
free (p->name);
free (p);
if (actual_len)
*actual_len = 0;
return (MAP_FAILED);
}
}
if (user_trace)
fprintf (stderr, "MMAPping file %s for direct memory access.\n", filename);
p->fd = open (filename, open_flags);
if (p->fd < 0)
{
close (p->fd);
free(p->name);
free(p);
if (actual_len)
*actual_len = 0;
return MAP_FAILED;
}
// If we didn't get a length, fill in the max possible length via statbuf
p->actual_len = p->requested_len;
if (p->actual_len < 0)
p->actual_len = statbuf.st_size - p->start;
//
// fprintf (stderr, "m");
p->addr = mmap (NULL,
p->actual_len,
p->prot,
p->mode,
p->fd,
p->start);
//fprintf (stderr, "M");
// we can't close the fd now (the docs say yes, testing says no,
// we need to wait till we're really done with the mmap.)
//close(p->fd);
if (p->addr == MAP_FAILED)
{
close (p->fd);
free(p->name);
free(p);
if (actual_len)
*actual_len = 0;
return MAP_FAILED;
}
#endif
#ifdef WIN32
if (p->mode & MAP_PRIVATE)
{
open_flags = GENERIC_READ;
createmap_flags = PAGE_WRITECOPY;
openmap_flags = FILE_MAP_COPY;
}
else
{
if (p->prot & PROT_WRITE)
{
open_flags = GENERIC_WRITE;
createmap_flags = PAGE_READWRITE;
openmap_flags = FILE_MAP_WRITE;
}
if (p->prot & PROT_READ)
{
open_flags |= GENERIC_READ;
if (!(p->prot & PROT_WRITE))
{
createmap_flags = PAGE_READONLY;
openmap_flags = FILE_MAP_READ;
}
}
}
if (internal_trace)
fprintf (stderr, "MMAP file open mode: %ld\n", (long) open_flags);
// If we need to, we stat the file.
if (p->requested_len < 0)
{
long k;
k = stat (p->name, &statbuf);
if (k != 0)
{
free (p->name);
free (p);
if (actual_len)
*actual_len = 0;
return (MAP_FAILED);
};
};
if (user_trace)
fprintf (stderr, "MMAPping file %s for direct memory access.\n", filename);
p->fd = CreateFile(filename, open_flags, 0,
NULL, OPEN_EXISTING, 0, NULL);
if (p->fd == INVALID_HANDLE_VALUE)
{
free(p->name);
free(p);
return NULL;
}
p->actual_len = p->requested_len;
if (p->actual_len < 0)
p->actual_len = statbuf.st_size - p->start;
p->mapping = CreateFileMapping(p->fd,
NULL,
createmap_flags, 0, requested_len,
NULL);
if (p->mapping == NULL)
{
CloseHandle(p->fd);
free(p->name);
free(p);
return NULL;
}
p->addr = MapViewOfFile(p->mapping, openmap_flags, 0, 0, 0);
if (p->addr == NULL)
{
CloseHandle(p->mapping);
CloseHandle(p->fd);
free(p->name);
free(p);
return NULL;
}
{
SYSTEM_INFO info;
GetSystemInfo(&info);
pagesize = info.dwPageSize;
}
// Jaspan-san says force-loading every page is a good thing
// under Windows. I know it's a bad thing under Linux,
// so we'll only do it under Windows.
{
char one_byte;
char *addr = (char *) p->addr;
long i;
for (i = 0; i < p->actual_len; i += pagesize)
one_byte = addr[i];
}
#endif
// If the caller asked for the length to be passed back, pass it.
if (actual_len)
*actual_len = p->actual_len;
// Now, insert this fresh mmap into the cache list
//
p->unmap_count = 0;
p->prev = NULL;
p->next = cache;
if (cache != NULL)
cache->prev = p;
cache = p;
return p->addr;
}
#ifdef WIN32
clock_t times(TMS_STRUCT *buf)
{
FILETIME create, exit, kern, user;
if (GetProcessTimes(GetCurrentProcess(), &create, &exit, &kern, &user))
{
buf->tms_utime = user.dwLowDateTime;
buf->tms_stime = kern.dwLowDateTime;
buf->tms_cutime = 0;
buf->tms_cstime = 0;
return GetTickCount();
}
return -1;
}
#endif
///////////////////////////////////////////////////////////////////////
//
// End of section of code dual-licensed to Yerazunis and Jaspan
//
///////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////
//
// strntrn - translate characters of a string.
//
// Original spec by Bill Yerazunis, original code by Raul Miller,
// recode for CRM114 use by Bill Yerazunis.
//
// This code section (crm_strntrn and subsidiary routines) is
// dual-licensed to both William S. Yerazunis and Raul Miller,
// including the right to reuse this code in any way desired,
// including the right to relicense it under any other terms as
// desired.
//
//////////////////////////////////////////////////////////////////////
//
// We start out with two helper routines - one to invert a string,
// and the other to expand string ranges.
//
//////////////////////////////////////////////////////////////////////
//
// Given a string of characters, invert it - that is, the string
// that was originally 0x00 to 0xFF but with all characters that
// were in the incoming string omitted and the string repacked.
//
// Returns a pointer to the fresh inversion, or NULL (on error)
//
// The old string is unharmed. Be careful of it.
//
// REMEMBER TO FREE() THE RESULT OR ELSE YOU WILL LEAK MEMORY!!!
unsigned char * crm_strntrn_invert_string (unsigned char *str,
long len,
long *rlen)
{
unsigned char *outstr;
long i, j;
// create our output string space. It will never be more than 256
// characters. It might be less. But we don't care.
outstr = malloc (256);
// error out if there's a problem with MALLOC
if (!outstr)
{
untrappableerror(
"Can't allocate memory to invert strings for strstrn", "");
}
// The string of all characters is the inverse of "" (the empty
// string), so a mainline string of "^" inverts here to the string
// of all characters from 0x00 to 0xff.
//
// The string "^" (equivalent to total overall string "^^") is the
// string of all characters *except* ^; the mainline code suffices
// for that situation as well.
//
// BUT THEN how does one specify the string of a single "^"? Well,
// it's NOT of NOT of "NOT" ("^"), so "^^^" in the original, or
// "^^" here, is taken as just a literal "^" (one carat character).
//
if (len == 2 && strncmp ((char *)str, "^^", 2) == 0)
{
outstr[0] = '^';
*rlen = 1;
return (outstr);
};
// No such luck. Fill our map with "character present".
// fill it with 1's ( :== "character present")
//
for (i=0; i < 256; i++)
outstr[i] = 1;
// for each character present in the input string, zero the output string.
for (i = 0; i < len; i++)
outstr [ str [i]] = 0;
// outstr now is a map of the characters that should be present in the
// final output string. Since at most this is 1:1 with the map (which may
// have zeros) we can just reuse outstr.
//
for (i = 0, j = 0 ; i < 256; i++)
if (outstr[i])
{
outstr[j] = i;
j++;
};
// The final string length is j characters long, in outstr.
// Don't forget to free() it later. :-)
// printf ("Inversion: '%s' RLEN: %d\n", outstr, *rlen);
*rlen = j;
return (outstr);
}
// expand those hyphenated string ranges - input is str, of length len.
// We return the new string, and the new length in rlen.
//
unsigned char * crm_strntrn_expand_hyphens(unsigned char *str,
long len,
long *rlen)
{
long j, k, adj;
unsigned char* r;
// How much space do we need for the expanded-hyphens string
// (note that the string might be longer than 256 characters, if
// the user specified overlapping ranges, either intentionally
// or unintentionally.
//
// On the other hand, if the user used a ^ (invert) as the first
// character, then the result is gauranteed to be no longer than
// 255 characters.
//
for (j= 1, adj=0; j < len-1; j++)
{
if ('-' == str[j])
{
adj+= abs(str[j+1]-str[j-1])-2;
}
}
// Get the string length for our expanded strings
//
*rlen = adj + len;
// Get the space for our expanded string.
r = malloc ( 1 + *rlen); /* 1 + to avoid empty problems */
if (!r)
{
untrappableerror(
"Can't allocate memory to expand hyphens for strstrn", "");
}
// Now expand the string, from "str" into "r"
//
for (j= 0, k=0; j < len; j++)
{
r[k]= str[j];
// are we in a hyphen expression? Check edge conditions too!
if ('-' == str[j] && j > 0 && j < len-1)
{
// we're in a hyphen expansion
if (j && j < len)
{
int delta;
int m = str[j-1];
int n = str[j+1];
int c;
// is this an increasing or decreasing range?
delta = m < n ? 1 : -1;
// run through the hyphen range.
if (m != n)
{
for (c= m+delta; c != n; c+= delta)
{
r[k++]= (unsigned char) c;
};
r[k++]= n;
}
j+= 1;
}
}
else
{
// It's not a range, so we just move along. Move along!
k++;
}
};
// fprintf (stderr, "Resulting range string: %s \n", r);
// return the char *string.
return (r);
}
// strntrn - translate a string, like tr() but more fun.
// This new, improved version not only allows inverted ranges
// like 9-0 --> 9876543210 but also negation of strings and literals
//
// flag of CRM_UNIQUE means "uniquify the incoming string"
//
// flag of CRM_LITERAL means "don't interpret the alteration string"
// so "^" and "-" regain their literal meaning
//
// The modification is "in place", and datastrlen gets modified.
// This routine returns a long >=0 strlen on success,
// and a negative number on failure.
long strntrn (
unsigned char *datastr,
long *datastrlen,
long maxdatastrlen,
unsigned char *fromstr,
long fromstrlen,
unsigned char *tostr,
long tostrlen,
long flags)
{
long len= *datastrlen;
long flen, tlen;
unsigned char map[256];
unsigned char *from = NULL;
unsigned char *to = NULL;
long j, k, last;
// If tostrlen == 0, we're deleting, except if
// ASLO fromstrlen == 0, in which case we're possibly
// just uniquing or maybe not even that.
//
int replace = tostrlen;
// Minor optimization - if we're just uniquing, we don't need
// to do any of the other stuff. We can just return now.
//
if (tostrlen == 0 && fromstrlen == 0)
{
// fprintf (stderr, "Fast exit from strntrn \n");
*datastrlen = len;
return (len);
};
// If CRM_LITERAL, the strings are ready, otherwise build the
// expanded from-string and to-string.
//
if (CRM_LITERAL & flags)
{
// Else - we're in literal mode; just copy the
// strings.
from = malloc (fromstrlen);
strncpy ( (char *)from, (char *)fromstr, fromstrlen);
flen = fromstrlen;
to = malloc (tostrlen);
strncpy ((char *) to, (char *)tostr, tostrlen);
tlen = tostrlen;
if (from == NULL || to == NULL) return (-1);
}
else
{
// Build the expanded from-string
if (fromstr[0] != '^')
{
from = crm_strntrn_expand_hyphens(fromstr, fromstrlen, &flen);
if (!from) return (-1);
}
else
{
unsigned char *temp;
long templen;
temp = crm_strntrn_expand_hyphens(fromstr+1, fromstrlen-1, &templen);
if (!temp) return (-1);
from = crm_strntrn_invert_string (temp, templen, &flen);
if (!from) return (-1);
free (temp);
};
// Build the expanded to-string
//
if (tostr[0] != '^')
{
to = crm_strntrn_expand_hyphens(tostr, tostrlen, &tlen);
if (!to) return (-1);
}
else
{
unsigned char *temp;
long templen;
temp = crm_strntrn_expand_hyphens(tostr+1, tostrlen-1, &templen);
if (!temp) return (-1);
to = crm_strntrn_invert_string (temp, templen, &tlen);
if (!to) return (-1);
free (temp);
};
};
// If we're in <unique> mode, squish out any duplicated
// characters in the input data first. We can do this as an in-place
// scan of the input string, and we always do it if <unique> is
// specified.
//
if (CRM_UNIQUE & flags)
{
unsigned char unique_map [257];
// build the map of the uniqueable characters
//
for (j = 0; j < 256; j++)
unique_map[j] = 1; // all characters are keepers at first...
for (j = 0; j < flen; j++)
unique_map[from[j]] = 0; // but some need to be uniqued.
// If the character has a 0 the unique map,
// and it's the same as the prior character,
// don't copy it. Just move along.
for (j= 0, k= 0, last= -1; j < len; j++)
{
if (datastr[j] != last || unique_map[datastr[j]] )
{
last= datastr[k++]= datastr[j];
};
};
len= k;
};
// Minor optimization - if we're just uniquing, we don't need
// Build the mapping array
//
if (replace)
{
// This is replacement mode (not deletion mode) so we need
// to build the character map. We
// initialize the map as each character maps to itself.
//
for (j= 0; j < 256; j++)
{
map[j]= j;
}
// go through and mod each character in the from-string to
// map into the corresponding character in the to-string
// (and start over in to-string if we run out)
//
for (j= 0, k=0; j < flen; j++)
{
map[from[j]]= to[k];
// check- did we run out of characters in to-string, so
// that we need to start over in to-string?
k++;
if (k >= tlen)
{
k= 0;
}
}
// Finally, the map is ready. We go thorugh the
// datastring translating one character at a time.
//
for (j= 0; j < len; j++)
{
datastr[j]= map[datastr[j]];
}
}
else
{
// No, we are not in replace mode, rather we are in delete mode
// so the map now says whether we're keeping the character or
// deleting the character.
for (j= 0; j < 256; j++)
{
map[j]= 1;
}
for (j= 0; j < flen; j++)
{
map[from[j]] = 0;
}
for (j= 0, k= 0; j < len; j++)
{
if (map[datastr[j]])
{
datastr[k++]= datastr[j];
}
}
len= k;
}
// drop the storage that we allocated
//
free(from);
free(to);
*datastrlen = len;
return (len);
}
/////////////////////////////////////////////////////////////////
//
// END of strntrn code (dual-licensed to both Yerazunis
// and Miller
//
//////////////////////////////////////////////////////////////////
syntax highlighted by Code2HTML, v. 0.9.1