// ---------------------------------------------------------------------------
// - Pattern.hpp -
// - afnix:txt module - pattern description class implementation -
// ---------------------------------------------------------------------------
// - This program is free software; you can redistribute it and/or modify -
// - it provided that this copyright notice is kept intact. -
// - -
// - This program is distributed in the hope that it will be useful, but -
// - without any warranty; without even the implied warranty of -
// - merchantability or fitness for a particular purpose. In no event shall -
// - the copyright holder be liable for any direct, indirect, incidental or -
// - special damages arising in any way out of the use of this software. -
// ---------------------------------------------------------------------------
// - copyright (c) 1999-2007 amaury darsch -
// ---------------------------------------------------------------------------
#include "Item.hpp"
#include "Vector.hpp"
#include "Pattern.hpp"
#include "Integer.hpp"
#include "Boolean.hpp"
#include "Character.hpp"
#include "QuarkZone.hpp"
#include "InputString.hpp"
namespace afnix {
// -------------------------------------------------------------------------
// - private section -
// -------------------------------------------------------------------------
// this structure permits to operate with an input stream that
// is prefixed by a string.
struct s_pis {
// the input stream
Input* p_is;
// the prefix string
String d_ps;
// the prefix stream
InputString d_ip;
// the escape character
t_quad d_ec;
// the read buffer
String d_buf;
// construct by stream
s_pis (Input* is) {
p_is = is;
d_ps = "";
d_ec = nilq;
reset ();
}
// construct by stream and prefix
s_pis (Input* is, const String ps) {
p_is = is;
d_ec = nilq;
d_ps = ps;
reset ();
}
// construct by stream, prefix and escape character
s_pis (Input* is, const String ps, const t_quad ec) {
p_is = is;
d_ec = nilq;
d_ps = ps;
d_ec = ec;
reset ();
}
// reset the prefixed stream
void reset (void) {
d_ip.set (d_ps);
d_buf = "";
}
// check for an escape character
bool isesc (const t_quad c) const {
if (d_ec == nilq) return false;
return (d_ec == c);
}
// read a character
t_quad rduc (void) {
// check first the prefix
if (d_ip.iseof () == false) {
return d_ip.rduc ();
}
// check the stream
if (!p_is) return eofc;
t_quad uc = p_is->rduc ();
if (uc == eofq) return uc;
// save in the pushback buffer
d_buf = d_buf + uc;
return uc;
}
// restore the input stream
void restore (void) {
if (!p_is) return;
if (d_buf.length () != 0) p_is->pushback (d_buf);
}
// check for a pattern and eventually consume
bool check (const String& pat, const bool pflg) {
// the check buffer
String buf;
// iterate in the pattern
long len = pat.length ();
for (long i = 0; i < len; i++) {
// update string pattern
t_quad uc = rduc ();
buf = buf + uc;
// check for escape
if (isesc (uc) == true) {
d_ip.pushback (buf);
return false;
}
// check for position
if (pat[i] != uc) {
d_ip.pushback (buf);
return false;
}
}
if (pflg == true) d_ip.pushback (buf);
return true;
}
// try to match with a start and end string patterns in balanced mode
String bmode (const String& sbs, const String& ebs) {
// check for initial matching
if (check (sbs, false) == false) {
restore ();
return "";
}
// initialize result
String result = sbs;
// accumulate until end match
while (check (ebs, true) == false) {
// get next character or escape
t_quad uc = rduc ();
// chekc for eof
if (uc == eofq) {
restore ();
return "";
}
// check for escape
if (isesc (uc) == true) {
t_quad nc = rduc ();
if (nc == eofq) {
restore ();
return "";
}
result = result + uc;
uc = nc;
}
// add the character
result = result + uc;
}
// the match occurs
result = result + ebs;
return result;
}
// try to match with a start and end string patterns in recursive mode
String rmode (const String& sbs, const String& ebs) {
// the recursive counter
long count = 0;
// check for initial matching
if (check (sbs, false) == false) {
restore ();
return "";
}
count++;
// initialize result
String result = sbs;
// loop for data
while (true) {
// check for start string
if (check (sbs, false) == true) {
result = result + sbs;
count++;
continue;
}
// check for end string
if (check (ebs, false) == true) {
result = result + ebs;
count--;
if (count == 0) break;
continue;
}
// get next character or escape
t_quad uc = rduc ();
// chekc for eof
if (uc == eofq) {
restore ();
return "";
}
// check for escape
if (isesc (uc) == true) {
t_quad nc = rduc ();
if (nc == eofq) {
restore ();
return "";
}
result = result + uc;
uc = nc;
}
// add the character
result = result + uc;
}
// the match occurs
return result;
}
};
// -------------------------------------------------------------------------
// - class section -
// -------------------------------------------------------------------------
// create an empty pattern
Pattern::Pattern (void) {
d_mode = REGEX;
d_sbs = "";
d_ebs = "";
d_name = "";
d_rtag = -1;
d_escc = nilq;
}
// create a regex pattern by string
Pattern::Pattern (const String& re) {
d_mode = REGEX;
d_regex = re;
d_sbs = "";
d_ebs = "";
d_name = "";
d_rtag = -1;
d_escc = nilq;
}
// create a regex pattern by name and string
Pattern::Pattern (const String& name, const String& re) {
d_mode = REGEX;
d_regex = re;
d_sbs = "";
d_ebs = "";
d_name = name;
d_rtag = -1;
d_escc = nilq;
}
// create a pattern by name and regex
Pattern::Pattern (const String& name, const Regex& re) {
d_mode = REGEX;
d_regex = re;
d_sbs = "";
d_ebs = "";
d_name = name;
d_rtag = -1;
d_escc = nilq;
}
// create a balanced pattern by name, control string and escape character
Pattern::Pattern (const String& name, const String& cs, const t_quad escc) {
d_mode = BMODE;
d_sbs = cs;
d_ebs = cs;
d_name = name;
d_rtag = -1;
d_escc = escc;
}
// create a balanced pattern by name, control strings and escape character
Pattern::Pattern (const String& name, const String& sbs, const String& ebs,
const t_quad escc) {
d_mode = BMODE;
d_sbs = sbs;
d_ebs = ebs;
d_name = name;
d_rtag = -1;
d_escc = escc;
}
// create a balanced pattern by name and control strings
Pattern::Pattern (const String& name, const String& sbs, const String& ebs) {
d_mode = BMODE;
d_sbs = sbs;
d_ebs = ebs;
d_name = name;
d_rtag = -1;
d_escc = nilq;
}
// create a pattern by name, control strings and flags
Pattern::Pattern (const String& name, const String& sbs, const String& ebs,
const bool rfl) {
d_mode = rfl ? RMODE : BMODE;
d_sbs = sbs;
d_ebs = ebs;
d_name = name;
d_rtag = -1;
d_escc = nilq;
}
// copy construct this regex element
Pattern::Pattern (const Pattern& that) {
that.rdlock ();
d_mode = that.d_mode;
d_regex = that.d_regex;
d_sbs = that.d_sbs;
d_ebs = that.d_ebs;
d_name = that.d_name;
d_rtag = that.d_rtag;
d_escc = that.d_escc;
unlock ();
}
// assign a pattern to this one
Pattern& Pattern::operator = (const Pattern& that) {
// make sure the regex elements are not equal
if (this == &that) return *this;
// lock this and that
that.rdlock ();
wrlock ();
// assign the pattern to this one
d_mode = that.d_mode;
d_regex = that.d_regex;
d_sbs = that.d_sbs;
d_ebs = that.d_ebs;
d_name = that.d_name;
d_rtag = that.d_rtag;
d_escc = that.d_escc;
// unlock everything
unlock ();
that.unlock ();
return *this;
}
// return the class name
String Pattern::repr (void) const {
return "Pattern";
}
// set the pattern with a regex string
void Pattern::setregex (const String& re) {
wrlock ();
try {
d_mode = REGEX;
d_regex = re;
d_sbs = "";
d_ebs = "";
unlock ();
} catch (...) {
unlock ();
throw;
}
}
// set the pattern with a regex object
void Pattern::setregex (const Regex& re) {
wrlock ();
try {
d_mode = REGEX;
d_regex = re;
d_sbs = "";
d_ebs = "";
unlock ();
} catch (...) {
unlock ();
throw;
}
}
// set the pattern with the balanced control strings
void Pattern::setbcs (const String& sbs, const String& ebs) {
wrlock ();
d_mode = BMODE;
d_regex = "";
d_sbs = sbs;
d_ebs = sbs;
unlock ();
}
// set the pattern with the escape character
void Pattern::setesc (const t_quad escc) {
wrlock ();
d_escc = escc;
unlock ();
}
// return the escape character
t_quad Pattern::getesc (void) const {
rdlock ();
t_quad escc = d_escc;
unlock ();
return escc;
}
// set the pattern name
void Pattern::setname (const String& name) {
wrlock ();
d_name = name;
unlock ();
}
// return the pattern name
String Pattern::getname (void) const {
rdlock ();
String result = d_name;
unlock ();
return result;
}
// set the pattern tag
void Pattern::settag (const long rtag) {
wrlock ();
d_rtag = rtag;
unlock ();
}
// return the pattern tag
long Pattern::gettag (void) const {
rdlock ();
long result = d_rtag;
unlock ();
return result;
}
// check a string with this pattern
bool Pattern::check (const String& s) const {
// lock and check for recursive consistency mode
rdlock ();
if ((d_mode == RMODE) && (d_sbs == d_ebs)) {
unlock ();
throw Exception ("pattern-error", "recursive mode with same delimiters");
}
try {
bool result = false;
switch (d_mode) {
case REGEX:
result = (d_regex == s);
break;
case BMODE:
{
s_pis pis (nilp, s, d_escc);
result = (pis.bmode (d_sbs, d_ebs) == s);
}
break;
case RMODE:
{
s_pis pis (nilp, s, d_escc);
result = (pis.rmode (d_sbs, d_ebs) == s);
}
break;
}
unlock ();
return result;
} catch (...) {
unlock ();
throw;
}
}
// match an input stream with this pattern
String Pattern::match (Input* is) const {
return match (is, "");
}
// match an input stream with this pattern and a prefix
String Pattern::match (Input* is, const String& ps) const {
// lock and check for recursive consistency mode
rdlock ();
if ((d_mode == RMODE) && (d_sbs == d_ebs)) {
unlock ();
throw Exception ("pattern-error", "recursive mode with same delimiters");
}
try {
String result = "";
switch (d_mode) {
case REGEX:
result = d_regex.match (is, ps);
break;
case BMODE:
{
s_pis pis (is, ps, d_escc);
result = pis.bmode (d_sbs, d_ebs);
}
break;
case RMODE:
{
s_pis pis (is, ps, d_escc);
result = pis.rmode (d_sbs, d_ebs);
}
break;
}
unlock ();
return result;
} catch (...) {
unlock ();
throw;
}
}
// -------------------------------------------------------------------------
// - object section -
// -------------------------------------------------------------------------
// the object eval quarks
static const long QUARK_REGEX = String::intern ("REGEX");
static const long QUARK_PATTERN = String::intern ("Pattern");
static const long QUARK_BALANCED = String::intern ("BALANCED");
static const long QUARK_RECURSIVE = String::intern ("RECURSIVE");
// the quark zone
static const long QUARK_ZONE_LENGTH = 10;
static QuarkZone zone (QUARK_ZONE_LENGTH);
// the object supported quarks
static const long QUARK_CHECK = zone.intern ("check");
static const long QUARK_MATCH = zone.intern ("match");
static const long QUARK_SETBCS = zone.intern ("set-balanced");
static const long QUARK_SETESC = zone.intern ("set-escape");
static const long QUARK_GETESC = zone.intern ("get-escape");
static const long QUARK_SETPTAG = zone.intern ("set-tag");
static const long QUARK_GETPTAG = zone.intern ("get-tag");
static const long QUARK_SETNAME = zone.intern ("set-name");
static const long QUARK_GETNAME = zone.intern ("get-name");
static const long QUARK_SETREGEX = zone.intern ("set-regex");
// evaluate a quark statically
Object* Pattern::meval (Runnable* robj, Nameset* nset, const long quark) {
if (quark == QUARK_REGEX)
return new Item (QUARK_PATTERN, QUARK_REGEX);
if (quark == QUARK_BALANCED)
return new Item (QUARK_PATTERN, QUARK_BALANCED);
if (quark == QUARK_RECURSIVE)
return new Item (QUARK_PATTERN, QUARK_RECURSIVE);
throw Exception ("eval-error", "cannot evaluate member",
String::qmap (quark));
}
// create a new object in a generic way
Object* Pattern::mknew (Vector* argv) {
long argc = (argv == nilp) ? 0 : argv->length ();
// check for 0 argument
if (argc == 0) return new Pattern;
// check for 1 argument
if (argc == 1) {
String re = argv->getstring (0);
return new Pattern (re);
}
// check for 2 arguments
if (argc == 2) {
// get the regex name
String name = argv->getstring (0);
// get the object and dispatch
Object* obj = argv->get (1);
// check for a string
String* sobj = dynamic_cast <String*> (obj);
if (sobj != nilp) return new Pattern (name, *sobj);
// check for a regex
Regex* robj = dynamic_cast <Regex*> (obj);
if (robj != nilp) return new Pattern (name, *robj);
throw Exception ("argument-error", "invalid arguments with pattern");
}
// check for 3 arguments
if (argc == 3) {
String name = argv->getstring (0);
String cs = argv->getstring (1);
// get the object and select
Object* obj = argv->get (2);
// check for a string
String* sobj = dynamic_cast <String*> (obj);
if (sobj != nilp) return new Pattern (name, cs, *sobj);
// check for a character
Character* cobj = dynamic_cast <Character*> (obj);
if (cobj != nilp) {
t_quad escc = cobj->toquad ();
return new Pattern (name, cs, escc);
}
throw Exception ("argument-error", "invalid arguments with pattern");
}
// check for 4 arguments
if (argc == 4) {
String name = argv->getstring (0);
String sbs = argv->getstring (1);
String ebs = argv->getstring (2);
// get the object and select
Object* obj = argv->get (3);
// check for a character
Character* cobj = dynamic_cast <Character*> (obj);
if (cobj != nilp) {
t_quad escc = cobj->toquad ();
return new Pattern (name, sbs, ebs, escc);
}
// check for a boolean
Boolean* bobj = dynamic_cast <Boolean*> (obj);
if (bobj != nilp) {
bool rfl = bobj->toboolean ();
return new Pattern (name, sbs, ebs, rfl);
}
throw Exception ("argument-error", "invalid arguments with pattern");
}
throw Exception ("argument-error", "too many arguments with pattern");
}
// return true if the given quark is defined
bool Pattern::isquark (const long quark, const bool hflg) const {
rdlock ();
if (zone.exists (quark) == true) {
unlock ();
return true;
}
bool result = hflg ? Object::isquark (quark, hflg) : false;
unlock ();
return result;
}
// apply this object with a set of arguments and a quark
Object* Pattern::apply (Runnable* robj, Nameset* nset, const long quark,
Vector* argv) {
// get the number of arguments
long argc = (argv == nilp) ? 0 : argv->length ();
// dispatch 0 argument
if (argc == 0) {
if (quark == QUARK_GETESC) return new Character (getesc ());
if (quark == QUARK_GETNAME) return new String (getname ());
if (quark == QUARK_GETPTAG) return new Integer (gettag ());
}
// dispatch 1 argument
if (argc == 1) {
if (quark == QUARK_SETNAME) {
String name = argv->getstring (0);
setname (name);
return nilp;
}
if (quark == QUARK_SETPTAG) {
long tag = argv->getint (0);
settag (tag);
return nilp;
}
if (quark == QUARK_SETESC) {
t_quad escc = argv->getchar (0);
setesc (escc);
return nilp;
}
if (quark == QUARK_CHECK) {
String sval = argv->getstring (0);
return new Boolean (check (sval));
}
if (quark == QUARK_MATCH) {
Object* obj = argv->get (0);
// check for a stream
Input* iobj = dynamic_cast <Input*> (obj);
if (iobj != nilp) return new String (match (iobj));
// check for a string
String* sobj = dynamic_cast <String*> (obj);
if (sobj != nilp) return new String (match (nilp, *sobj));
// argument error
throw Exception ("type-error", "invalid object with match ",
Object::repr (obj));
}
if (quark == QUARK_SETREGEX) {
Object* obj = argv->get (0);
String* sre = dynamic_cast <String*> (obj);
if (sre != nilp) {
setregex (*sre);
return nilp;
}
Regex* ore = dynamic_cast <Regex*> (obj);
if (ore != nilp) {
setregex (*ore);
return nilp;
}
throw Exception ("type-error", "invalid object with set-regex ",
Object::repr (obj));
}
if (quark == QUARK_SETBCS) {
String bs = argv->getstring (0);
setbcs (bs, bs);
return nilp;
}
}
// dispatch 2 arguments
if (argc == 2) {
if (quark == QUARK_MATCH) {
// get the input stream
Object* obj = argv->get (0);
Input* iobj = dynamic_cast <Input*> (obj);
// get the prefix
String ps = argv->getstring (1);
if (iobj != nilp) return new String (match (iobj, ps));
throw Exception ("type-error", "invalid object with match ",
Object::repr (obj));
}
if (quark == QUARK_SETBCS) {
String sbs = argv->getstring (0);
String ebs = argv->getstring (1);
setbcs (sbs, ebs);
return nilp;
}
}
// call the object method
return Object::apply (robj, nset, quark, argv);
}
}
syntax highlighted by Code2HTML, v. 0.9.1