Module: jam-internals Author: Peter S. Housel Copyright: Original Code is Copyright 2004 Gwydion Dylan Maintainers All rights reserved. License: Functional Objects Library Public License Version 1.0 Dual-license: GNU Lesser General Public License Warranty: Distributed WITHOUT WARRANTY OF ANY KIND define constant $regular-expression-cache :: = make(); define function parse-regular-expression (string :: ) => (node :: ); local method parse-regexp0 (string :: , start :: ) => (node :: , pos :: ); let (node :: , pos :: ) = parse-regexp1(string, start); while (pos < string.size & string[pos] == '|') let (new-node :: , new-pos :: ) = parse-regexp1(string, pos + 1); node := make(, union1: node, union2: new-node); pos := new-pos; end while; values(node, pos); end, method parse-regexp1 (string :: , start :: ) => (node :: , pos :: ); let (node :: , pos :: ) = parse-regexp2(string, start); while (pos < string.size & string[pos] ~== '|' & string[pos] ~== ')') let (new-node :: , new-pos :: ) = parse-regexp2(string, pos); node := make(, head: node, tail: new-node); pos := new-pos; end while; values(node, pos); end, method parse-regexp2 (string :: , start :: ) => (node :: , pos :: ); let (node :: , pos :: ) = parse-regexp3(string, start); if (pos < string.size) if (string[pos] == '*') let new-node = make(, of: node); values(new-node, pos + 1); elseif (string[pos] == '+') let new-node = make(, head: node, tail: make(, of: copy-regular-expression(node))); values(new-node, pos + 1); elseif (string[pos] == '?') let new-node = make(, union1: make(), union2: node); values(new-node, pos + 1); else values(node, pos); end if; else values(node, pos); end if; end, method parse-regexp3 (string :: , start :: ) => (node :: , pos :: ); if (start >= string.size) error("regexp missing at end of '%s'", string); else if (string[start] == '(') let (node :: , pos :: ) = parse-regexp0(string, start + 1); if (pos >= string.size | string[pos] ~== ')') error("closing ')' missing in regular expression '%s'", string); else values(node, pos + 1); end if; elseif (string[start] == '\\' & start < string.size - 1) values(make(, symbol: as(, string[start + 1])), start + 2); elseif (string[start] == '.') let dot-set = make(, upper-bound-hint: 256); for (symbol :: from 0 below 256) if (symbol ~= as(, '\n')) set-add!(dot-set, symbol); end; end for; values(make(, symbol-set: dot-set), start + 1); elseif (string[start] == '[') let cclass-set = make(, upper-bound-hint: 256); let pos = start + 1; let complement? = if (pos < string.size & string[pos] = '^') start := start + 1; pos := pos + 1; #t; else #f end if; while (pos < string.size & (string[pos] ~== ']' | pos = start + 1)) if (pos + 2 < string.size & string[pos + 1] == '-') for (symbol :: from as(, string[pos]) to as(, string[pos + 2])) set-add!(cclass-set, symbol); end for; pos := pos + 3; else set-add!(cclass-set, as(, string[pos])); pos := pos + 1; end; end while; if (pos = string.size) error("closing ']' missing in regexp '%s'", string); end if; if (complement?) let complement-set = make(, upper-bound-hint: 256); for (symbol :: from 0 below 256) unless (member?(symbol, cclass-set)) add!(complement-set, symbol) end; end for; values(make(, symbol-set: complement-set), pos + 1); else values(make(, symbol-set: cclass-set), pos + 1); end; else values(make(, symbol: as(, string[start])), start + 1); end if; end if; end; let cached-node = element($regular-expression-cache, string, default: #f); if (cached-node) cached-node else let (node :: , pos :: ) = parse-regexp0(string, 0); if (pos < string.size) error("regular expression \"%s\" ended prematurely at position %d", string, pos) end if; $regular-expression-cache[string] := node end if; end function; define class () // no slots end class; define class () slot match-dfa-state-accepting? :: = #f; end class; define sealed method do-regular-expression-dfa-state-position (state :: , position :: , #key deterministic? = #f) => (); state.match-dfa-state-accepting? := #t; end method; define function jam-builtin-match (jam :: , regexps :: , strings :: ) => (result :: ); error("the Match built-in rule is not yet implemented"); #[] end function;