Module: java-parser Author: Gail Zacharias Copyright: Original Code is Copyright (c) 1995-2004 Functional Objects, Inc. All rights reserved. License: Functional Objects Library Public License Version 1.0 Dual-license: GNU Lesser General Public License Warranty: Distributed WITHOUT WARRANTY OF ANY KIND // should this inherit from as well? define abstract class () end; define abstract class () end; define abstract class () end; define abstract class () end; define abstract class () end; define abstract class () end; define abstract class () end; define abstract class () end; define generic token-parse-class (x :: ); define grammar-sequence (); define grammar-sequence (); define constant = limited(, of: ); define grammar-sequence (); define grammar-sequence (); define grammar-sequence (); // Literals define abstract class (, ) end; define method token-parse-class (x :: ) => (parse-class) $literal-token end; define generic literal-value (x :: ) => (value); define abstract class () end; define class () end; define class () end; define method literal-value (x :: ) => (value) #t end; define method literal-value (x :: ) => (value) #f end; define class () end; define method literal-value (x :: ) => (value) #f end; define abstract class () constant slot literal-value :: , required-init-keyword: value:; end; define generic integer-literal-radix (lit :: ) => (radix :: ); define abstract class () constant class slot integer-literal-radix :: = 16; end; define abstract class () constant class slot integer-literal-radix :: = 10; end; define abstract class () constant class slot integer-literal-radix :: = 8; end; define abstract class () end; define abstract class () end; define class (, ) end; define class (, ) end; define class (, ) end; define class (, ) end; define class (, ) end; define class (, ) end; define abstract class () constant slot float-literal-mantissa :: , required-init-keyword: int:; constant slot float-literal-scale :: , required-init-keyword: scale:; constant slot float-literal-exponent :: , required-init-keyword: expt:; end; define class () end; define class () end; define method literal-value (x :: ) => (val :: ) // TODO: do better by more direct conversion. let exponent = generic--(x.float-literal-exponent, x.float-literal-scale); let m :: = as(, x.float-literal-mantissa); let e :: = as(, 10) ^ exponent; generic-*(m, e); end; define class () constant slot literal-value :: , required-init-keyword: value:; end; define abstract class () end; define class () constant slot literal-value :: , required-init-keyword: value:; end; define class () constant slot literal-value :: , required-init-keyword: value:; end; // define class (, ) constant slot identifier-name :: , required-init-keyword: name:; end; define method name-identifiers (id :: ) make(, size: 1, fill: id); end; define method token-parse-class (x :: ) $identifier-token end; define class (, ) constant slot primitive-type-symbol :: , required-init-keyword: name:; end; define method type-name (type :: ) => (name :: ) type end; define method type-numdims (type :: ) => (n :: singleton(0)) 0 end; define method token-parse-class (x :: ) $primitive-type-token end; define class (, ) end; define method token-parse-class (x :: ) $%this-token end; define class (, ) end; define method token-parse-class (x :: ) $%super-token end; // Note that special tokens which are not operators never get seen outside // of here... define class () constant slot token-parse-class, required-init-keyword: class:; end; define class () constant slot operator-symbol :: , required-init-keyword: name:; end; define class () constant slot assignment-operator :: false-or(), required-init-keyword: op:; end; define class () constant slot lexer-identifiers :: = make(); end; define method make-java-lexer (contents :: ) => (fn :: ) let ls = make(); let posn :: = 0; local method next-token () => (next-token-class, next-token-value) let (token, new-pos) = get-token-from-contents($java-tokenizer, contents, posn, ls); posn := new-pos; if (token == #f) values($eof-token, #f) else values(token.token-parse-class, token) end; end method; next-token end method make-java-lexer; define method make-java-lexer (stream :: ) => (fn :: ) let bytes = stream.stream-size; let contents :: = make(, size: bytes); read-into!(stream, bytes, contents); make-java-lexer(contents) end method make-java-lexer; define method make-java-lexer (file :: ) => (fn :: ) let contents :: = with-open-file (stream = file, element-type: ) read-to-end(stream) end with-open-file; make-java-lexer(contents) end method make-java-lexer; define method make-java-lexer (source :: ) => (fn :: ) let contents :: = as(, source); make-java-lexer(contents); end method make-java-lexer; define constant $predefined-tokens :: = make(); define inline-only function def-reserved (name, class, #rest keys) $predefined-tokens[name] := apply(make, class, name: as(, name), keys); end; begin $predefined-tokens["true"] := make(); $predefined-tokens["false"] := make(); $predefined-tokens["null"] := make(); // reserved but not used def-reserved("const", , class: -1); def-reserved("goto", , class: -1); def-reserved("abstract", , class: $%abstract-token); def-reserved("boolean", ); def-reserved("break", , class: $%break-token); def-reserved("byte", ); def-reserved("case", , class: $%case-token); def-reserved("catch", , class: $%catch-token); def-reserved("char", ); def-reserved("class", , class: $%class-token); def-reserved("continue", , class: $%continue-token); def-reserved("default", , class: $%default-token); def-reserved("do", , class: $%do-token); def-reserved("double", ); def-reserved("else", , class: $%else-token); def-reserved("extends", , class: $%extends-token); def-reserved("final", , class: $%final-token); def-reserved("finally", , class: $%finally-token); def-reserved("float", ); def-reserved("for", , class: $%for-token); def-reserved("if", , class: $%if-token); def-reserved("implements", , class: $%implements-token); def-reserved("import", , class: $%import-token); def-reserved("instanceof", , class: $%instanceof-token); def-reserved("int", ); def-reserved("interface", , class: $%interface-token); def-reserved("long", ); def-reserved("native", , class: $%native-token); def-reserved("new", , class: $%new-token); def-reserved("package", , class: $%package-token); def-reserved("private", , class: $%private-token); def-reserved("protected", , class: $%protected-token); def-reserved("public", , class: $%public-token); def-reserved("return", , class: $%return-token); def-reserved("short", ); def-reserved("static", , class: $%static-token); def-reserved("super", ); def-reserved("switch", , class: $%switch-token); def-reserved("synchronized", , class: $%synchronized-token); def-reserved("this", ); def-reserved("throw", , class: $%throw-token); def-reserved("throws", , class: $%throws-token); def-reserved("transient", , class: $%transient-token); def-reserved("try", , class: $%try-token); def-reserved("void", , class: $%void-token); def-reserved("volatile", , class: $%volatile-token); def-reserved("while", , class: $%while-token); def-reserved("(", , class: $%lparen-token); def-reserved(")", , class: $%rparen-token); def-reserved("{", , class: $%lbrace-token); def-reserved("}", , class: $%rbrace-token); def-reserved("[", , class: $%lbracket-token); def-reserved("]", , class: $%rbracket-token); def-reserved(";", , class: $%semi-colon-token); def-reserved(",", , class: $%comma-token); def-reserved(".", , class: $%dot-token); def-reserved("?", , class: $%qmark-token); def-reserved(":", , class: $%colon-token); def-reserved("=", , class: $%=-token, op: #f); def-reserved(">", , class: $%>-token); def-reserved("<", , class: $%<-token); def-reserved("!", , class: $%!-token); def-reserved("~", , class: $%~-token); def-reserved("==", , class: $%==-token); def-reserved(">=", , class: $%>=-token); def-reserved("<=", , class: $%<=-token); def-reserved("!=", , class: $%!=-token); def-reserved("&&", , class: $%&&-token); def-reserved("||", , class: $%||-token); def-reserved("+", , class: $%+-token); def-reserved("-", , class: $%--token); def-reserved("*", , class: $%*-token); def-reserved("/", , class: $%/-token); def-reserved("&", , class: $%&-token); def-reserved("|", , class: $%|-token); def-reserved("^", , class: $%^-token); def-reserved("%", , class: $%%-token); def-reserved(">>", , class: $%>>-token); def-reserved("<<", , class: $%<<-token); def-reserved(">>>", , class: $%>>>-token); def-reserved("++", , class: $%++-token, op: $predefined-tokens["+"]); def-reserved("--", , class: $%---token, op: $predefined-tokens["-"]); def-reserved("+=", , class: $%+=-token, op: $predefined-tokens["+"]); def-reserved("-=", , class: $%-=-token, op: $predefined-tokens["-"]); def-reserved("*=", , class: $%*=-token, op: $predefined-tokens["*"]); def-reserved("/=", , class: $%/=-token, op: $predefined-tokens["/"]); def-reserved("&=", , class: $%&=-token, op: $predefined-tokens["&"]); def-reserved("|=", , class: $%|=-token, op: $predefined-tokens["|"]); def-reserved("^=", , class: $%^=-token, op: $predefined-tokens["^"]); def-reserved("%=", , class: $%%=-token, op: $predefined-tokens["%"]); def-reserved("<<=", , class: $%<<=-token, op: $predefined-tokens["<<"]); def-reserved(">>=", , class: $%>>=-token, op: $predefined-tokens[">>"]); def-reserved(">>>=", , class: $%>>>=-token, op: $predefined-tokens[">>>"]); end; define function make-identifier (ls :: , string :: ) => (token :: ) element($predefined-tokens, string, default: #f) | element(ls.lexer-identifiers, string, default: #f) | (ls.lexer-identifiers[string] := make(, name: string)) end; define function parse-decimal-literal (ls :: , string :: ) => (token :: ) let length = string.size; iterate loop (posn :: = 0, res :: = 0) if (posn < length) let digit :: = as(, string[posn]); if (digit == as(, 'l') | digit == as(, 'L')) make(, value: res) else loop(posn + 1, generic-+(generic-*(res, 10), digit - 48)) end else make(, value: res) end; end iterate; end; define function parse-octal-literal (ls :: , string :: ) => (token :: ) let length = string.size; iterate loop (posn :: = 0, res :: = 0) if (posn < length) let digit :: = as(, string[posn]); if (digit == as(, 'l') | digit == as(, 'L')) make(, value: res) else loop(posn + 1, generic-+(generic-*(res, 8), digit - 48)) end else make(, value: res) end; end iterate; end; define function parse-hex-literal (ls :: , string :: ) => (token :: ) let length = string.size; iterate loop (posn :: = 2, res :: = 0) if (posn < length) let digit :: = logior(32, as(, string[posn])); if (digit == as(, 'l')) make(, value: res) else let value = if (digit >= 97) digit - 87 else digit - 48 end; loop(posn + 1, generic-+(generic-*(res, 16), value)) end else make(, value: res) end; end iterate; end; define function parse-float-literal (ls :: , string :: ) => (token :: ) let length = string.size; local method edigits (m :: , s :: , sign :: one-of(1, -1), posn :: , res :: ) if (posn < length) let digit = as(, string[posn]); if (digit == as(, 'f') | digit == as(, 'F')) make(, mantissa: m, scale: s, expt: generic-*(res, sign)); elseif (digit == as(, 'D') | digit == as(, 'D')) make(, mantissa: m, scale: s, expt: generic-*(res, sign)); else edigits(m, s, sign, posn + 1, generic-+(generic-*(res, 10), digit)) end else make(, mantissa: m, scale: s, expt: generic-*(res, sign)); end; end method; local method fexpt (m :: , s :: , posn :: ) let sign = string[posn]; if (sign == '+') edigits(m, s, 1, posn + 1, 0) elseif (sign == '-') edigits(m, s, -1, posn + 1, 0) else edigits(m, s, 1, posn, 0) end; end method; local method fdigits (posn :: , m :: , s :: ) if (posn < length) let digit :: = as(, string[posn]); if (digit == as(, 'e') | digit == as(, 'E')) fexpt(m, s, posn + 1); elseif (digit == as(, 'f') | digit == as(, 'F')) make(, mantissa: m, scale: s, expt: 0); elseif (digit == as(, 'D') | digit == as(, 'D')) make(, mantissa: m, scale: s, expt: 0); else fdigits(posn + 1, generic-+(generic-*(m, 10), digit), s + 1); end else make(, mantissa: m, scale: s, expt: 0); end; end method; iterate idigits (posn :: = 0, res :: = 0) let digit :: = as(, string[posn]); if (digit == as(, '.')) fdigits(posn + 1, res, 0) elseif (digit == as(, 'e') | digit == as(, 'E')) fexpt(res, 0, posn + 1); elseif (digit == as(, 'f') | digit == as(, 'F')) make(, mantissa: res, scale: 0, expt: 0); elseif (digit == as(, 'D') | digit == as(, 'D')) make(, mantissa: res, scale: 0, expt: 0); else idigits(posn + 1, generic-+(generic-*(res, 10), digit)) end end iterate; end; define function parse-escape (string, posn) let ch = string[posn]; let nposn = posn + 1; select (ch) 'b' => values(8, nposn); 't' => values(as(, '\t'), nposn); 'n' => values(as(, '\n'), nposn); 'f' => values(as(, '\f'), nposn); 'r' => values(as(, '\r'), nposn); '"' => values(as(, '"'), nposn); '\'' => values(as(, '\''), nposn); '\\' => values(as(, '\\'), nposn); otherwise => iterate loop (nposn :: = posn, res :: = 0) let digit = as(, string[nposn]); if (nposn < posn + 3 & 48 <= digit & digit <= 55) loop(nposn + 1, res * 8 + digit - 48) else values(res, nposn) end end iterate; end; end; define function parse-character-literal (ls :: , string :: ) => (token :: ) let ch = string[1]; let n = if (ch ~== '\\') as(, ch) else parse-escape(string, 2) end; make(, value: n); end; define function parse-string-literal (ls :: , string :: ) => (token :: ) iterate loop (inpos :: = 1, opos :: = 0) let ch = string[inpos]; if (ch == '\"') make(, value: copy-sequence(string, end: opos)); elseif (ch ~== '\\') string[opos] := ch; loop(inpos + 1, opos + 1); else let (nch, npos) = parse-escape(string, inpos + 1); if (nch >= 255) // oops parse-unicode-string-literal(ls, string) else string[opos] := as(, nch); loop(npos, opos + 1); end; end; end iterate; end; define function parse-unicode-string-literal (ls :: , string :: ) => (token :: ) let v :: = make(, size: string.size - 2, fill: as(, 0)); iterate loop (inpos :: = 1, opos :: = 0) let ch = string[inpos]; if (ch == '\"') make(, value: if (opos == v.size) v else copy-sequence(v, end: opos) end); elseif (ch ~== '\\') v[opos] := as(, ch); loop(inpos + 1, opos + 1); else let (nch, npos) = parse-escape(string, inpos + 1); v[opos] := as(, nch); loop(npos, opos + 1); end; end iterate; end; define state-machine $java-tokenizer state start: #f, start: " \t\f\r\n\<0B>\<1A>", // vertical tab, 1A = ^Z slash: '/', symbol: "A-Za-z_$", zero: '0', decimal-digits: "1-9", dot: '.', char-quote: '\'', string-quote: '\"', operator: "(){}[];,~?:", operator-pre-equal: "=!*^%", // ! != and: '&', // & && &= or: '|', plus: '+', minus: '-', greater: '>', // > >= >> >>> >>= >>>= lesser: '<'; // < <= << <<= state slash: make-identifier, single-line-comment: '/', multi-line-comment: '*', slash-equal: '='; state single-line-comment: #f, start: "\r\n", single-line-comment: "\<00>-\<09>\<0B>\<0C>\<0E>-\"; state multi-line-comment: #f, multi-line-comment-star: '*', multi-line-comment: "\<00>-)+-\"; state multi-line-comment-star: #f, start: '/', multi-line-comment-star: '*', multi-line-comment: "\<00>-)+-.0-\"; state slash-equal: make-identifier; state symbol: make-identifier, symbol: "A-Za-z_$0-9"; state dot: make-identifier, float-after-dot: "0-9"; state zero: parse-decimal-literal, decimal-parsed: "Ll", hex: "xX", octal: "0-7", decimal-digits: "89", float-after-dot: ".", float-after-expt: "Ee", float-parsed: "fdFD"; state decimal-digits: parse-decimal-literal, decimal-parsed: "Ll", decimal-digits: "0-9", float-after-dot: ".", float-after-expt: "Ee", float-parsed: "fdFD"; state decimal-parsed: parse-decimal-literal; state octal: parse-octal-literal, octal-parsed: "Ll", octal: "0-7", float-digits: "89", // Can't be decimal, because started with 0. float-after-dot: ".", float-after-expt: "Ee", float-parsed: "fdFD"; state octal-parsed: parse-octal-literal; state hex: parse-hex-literal, hex-parsed: "Ll", hex: "0-9A-Fa-f"; state hex-parsed: parse-hex-literal; state float-digits: parse-float-literal, float-digits: "0-9", float-after-dot: ".", float-after-expt: "Ee", float-parsed: "fdFD"; state float-after-dot: parse-float-literal, float-after-dot: "0-9", float-after-expt: "Ee", float-parsed: "fdFD"; state float-after-expt: parse-float-literal, float-in-expt: "0-9+", float-in-expt: '-', float-parsed: "fdFD"; state float-in-expt: parse-float-literal, float-in-expt: "0-9", float-parsed: "fdFD"; state float-parsed: parse-float-literal; state char-quote: #f, close-char-quote: "\<00>-\<09>\<0B>\<0C>\<0E>-&(-[]-\", char-escape: '\\'; state char-escape: #f, close-char-quote: "\\'\"btnfr", octal-char-quote-2: "0-3", octal-char-quote-1: "4-7"; state octal-char-quote-2: #f, octal-char-quote-1: "0-7", character: '\''; state octal-char-quote-1: #f, close-char-quote: "0-7", character: '\''; state close-char-quote: #f, character: '\''; state character: parse-character-literal; state string-quote: #f, string: '"', string-escape: '\\', string-quote: "\<00>-\<09>\<0B>\<0C>\<0E>-!#-[]-\"; state string-escape: #f, string-quote: "\\'\"btnfr0-7"; state string: parse-string-literal; state and: make-identifier, // & && &= operator: '&', operator: '='; state or: make-identifier, // | || |= operator: '|', operator: '='; state plus: make-identifier, // + ++ += operator: '+', operator: '='; state minus: make-identifier, // - -- -= operator: '-', operator: '='; state greater: make-identifier, // > >= >> >>> >>= >>>= operator: '=', greater-greater: '>'; state greater-greater: make-identifier, // >> >>> >>= >>>= operator: '=', operator-pre-equal: '>'; state lesser: make-identifier, // < <= << <<= operator: '=', operator-pre-equal: '<'; state operator-pre-equal: make-identifier, operator: '='; state operator: make-identifier; end state-machine;