#include "RegEx.h"
#include <stdio.h>
#include <string.h>
extern void Exit(const char*);
//void Exit(const char*){}
RegEx::RegEx()
{
next = NULL;
value = NULL;
repeat = false;
plainstring = true;
valuelen = 0;
}
RegEx::RegEx(const char *s)
{
RegEx::RegEx();
Load(s);
}
RegEx::~RegEx()
{
if (next != NULL) delete next;
if (value != NULL) delete value;
}
void RegEx::Load(const char* s)
{
int stringlen = strlen(s);
if (!SetContains(s, '('))
{
//it's just a string
value = new char[stringlen + 1];
strcpy(value, s);
value[stringlen] = 0;
plainstring = true;
repeat = false;
valuelen = stringlen;
//printf("string %s\n", value);
next = NULL;
#ifdef REGEX_DEBUG
printf("loaded string %s, length=%i\n",value,valuelen);
#endif
return;
}
int setbegin = WhereInSet(s, '(');
if (setbegin > 0)
{
//first build the string
value = new char[setbegin+1];
strncpy(value, s, setbegin);
value[setbegin+1] = 0;
plainstring = true;
repeat = false;
valuelen = strlen(value);
#ifdef REGEX_DEBUG
printf("begin string %s, length=%i\n", value,valuelen);
#endif
//then tack on the ending regex
next = new RegEx();
next->Load(&s[setbegin]);
}
else //setbegin == 0
{
//build the set from here
int setend = WhereInSet(s, ')');
if (setend == -1)
{
Exit("Error building regular expression set. No set close found. Ignoring.\n");
}
int beginrange=-1, endrange=-1;
int index = 1;
char tempset[256];
memset(tempset, 0, 256);
while(index < setend)
{
#ifdef REGEX_DEBUG
printf("starting char range search\n");
#endif
if (s[index] != '\'')
{
Exit("Error building regular expression set. Invalid range definition. Ignoring.\n");
}
beginrange = ParseChar(s,index); //auto-increments index past char close
#ifdef REGEX_DEBUG
printf("beginrange=%c\n",beginrange);
#endif
if (s[index] == '-')
{
//find the end of the range
index++;
endrange = ParseChar(s,index);
}
else if(s[index] == ',' || index==setend)
{
//range is one character
#ifdef REGEX_DEBUG
printf("found comma: index=%i, setend=%i\n",index, setend);
#endif
endrange = beginrange;
index++;
}
else if(index < setend)
{
//oops! invalid
Exit("Error building regular expression set. Invalid range definition. Ignoring.\n");
}
for(int i=beginrange; i<=endrange; i++)
if(i>0) tempset[strlen(tempset)] = i;
if (s[index]==',') index++;
}
plainstring = false;
valuelen = strlen(tempset);
value = new char[valuelen+1];
memset(value, 0, valuelen+1);
strcpy(value, tempset);
//check if it's a repeating set
if (setend+1 < stringlen && s[setend+1]=='*')
repeat = true;
else
repeat = false;
#ifdef REGEX_DEBUG
printf("set %s %s\n",value,(repeat?"repeats":"does not repeat"));
#endif
//next regex
if (setend+(repeat ? 2 : 1) < stringlen)
{
next = new RegEx();
next->Load(&s[setend+(repeat?2:1)]);
}
else
{
next = NULL;
return;
}
}
}
int RegEx::Matches(const char* s) const
{
int stringlen = strlen(s);
if (plainstring)
{
#ifdef REGEX_DEBUG
printf("searching for plainstring match %s\n",value);
printf("stringlen=%i, valuelen=%i\n",stringlen,valuelen);
#endif
if (stringlen < valuelen) return -1;
if (valuelen == stringlen) return (strcmp(s,value) ? -1 : valuelen);
if (strncmp(s,value,valuelen)) return -1;
if (!next) return valuelen;
int rest = next->Matches(&s[valuelen]);
return (rest==-1 ? -1 : rest + valuelen);
}
//else we've got a set
if (!repeat)
{
#ifdef REGEX_DEBUG
printf("searching non-repeating set %s\n",value);
#endif
if (stringlen <= 1) return -1;
if (!SetContains(value, s[0])) return -1;
if (next == NULL) return 1;
int rest = next->Matches(&s[1]);
return (rest==-1 ? -1 : rest + 1);
}
//else repeat it any number of times
#ifdef REGEX_DEBUG
printf("searching repeating set %s\n",value);
#endif
int index = 0;
while(SetContains(value, s[index])) index++;
if (next == NULL) return index;
int rest = next->Matches(&s[index]);
return (rest==-1 ? -1 : rest + index);
}
bool RegEx::SetContains(const char* set, char c) const
{
int setlen = strlen(set);
for(int i=0; i<setlen; i++)
if (c==set[i]) return true;
return false;
}
int RegEx::WhereInSet(const char* set, char c) const
{
int setlen = strlen(set);
for(int i=0; i<setlen; i++)
if (c==set[i]) return i;
return -1;
}
char RegEx::ParseChar(const char* s, int &i) const
{
if(s[i] == '\'' && s[i+2] == '\'')
{
char c = s[i+1];
i+=3;
return c;
}
Exit("Error parsing character in regular expression\n");
return -1;
}
/*
int main()
{
//char expression[] = "$('0'-'9','a'-'f','A'-'F')('0'-'9','a'-'f','A'-'F')*";
char expression[] = "//(' '-'~','\r')*\n";
RegEx r;
r.Load(expression);
printf("%i\n",r.Matches("//hello123\nabcdefghijklmnop"));
return 0;
}
*/
syntax highlighted by Code2HTML, v. 0.9.1