------------------------------------------------------------------------------ -- Lexical Scanner (the thing that reads your source code) -- -- Also, the semantic stuff. -- -- -- -- Part of BUSH -- ------------------------------------------------------------------------------ -- -- -- Copyright (C) 2001-2005 Ken O. Burtch & FSF -- -- -- -- This is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- -- ware Foundation; either version 2, or (at your option) any later ver- -- -- sion. This is distributed in the hope that it will be useful, but WITH- -- -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- -- for more details. You should have received a copy of the GNU General -- -- Public License distributed with this; see file COPYING. If not, write -- -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, -- -- MA 02111-1307, USA. -- -- -- -- This is maintained at http://www.pegasoft.ca -- -- -- ------------------------------------------------------------------------------ -- CVS: $Header: /home/cvsroot/bush/src/scanner.adb,v 1.6 2005/08/31 15:10:45 ken Exp $ pragma warnings( off ); -- suppress Gnat-specific package warning with ada.command_line.environment; pragma warnings( on ); with system, ada.calendar, ada.text_io, ada.integer_text_io, ada.strings.fixed, ada.strings.unbounded.text_io, ada.characters.handling, bush_os, bush_os.tty, signal_flags, script_io, user_io, string_util, scanner_arrays, parser_os, parser_arrays, parser_files, parser_lock, parser_cmd, parser_cgi, parser_units, parser_cal, parser_db, parser_mysql, parser_numerics, parser_strings, parser_stats, parser_tio, parser_pen, parser_dirops; use ada.text_io, ada.integer_text_io, ada.command_line, ada.command_line.environment, ada.strings.fixed, ada.strings.unbounded, ada.strings.unbounded.text_io, ada.characters.handling, bush_os, bush_os.tty, signal_flags, script_io, user_io, string_util, scanner_arrays, parser_os, parser_arrays, parser_files, parser_lock, parser_cmd, parser_cgi, parser_units, parser_cal, parser_db, parser_mysql, parser_numerics, parser_strings, parser_stats, parser_tio, parser_pen, parser_dirops; pragma Optimize( time ); package body scanner is ----------------------------------------------------------------------------- -- Common constants ----------------------------------------------------------------------------- semicolon_string : constant unbounded_string := to_unbounded_string( ";" ); -- semi-colon, as an unbounded string verticalbar_string : constant unbounded_string := to_unbounded_string( "|" ); -- vertical bar, as an unbounded string ampersand_string : constant unbounded_string := to_unbounded_string( "&" ); -- ampersand, as an unbounded string redirectIn_string : constant unbounded_string := to_unbounded_string( "<" ); -- less than, as an unbounded string redirectOut_string : constant unbounded_string := to_unbounded_string( ">" ); -- greater than, as an unbounded string redirectAppend_string : constant unbounded_string := to_unbounded_string( ">>" ); -- double greater than, as an unbounded string redirectErrOut_string : constant unbounded_string := to_unbounded_string( "2>" ); -- '2' + greater than, as an unbounded string redirectErrAppend_string : constant unbounded_string := to_unbounded_string( "2>>" ); -- '2' + double greater than, as an unbounded string redirectErr2Out_string : constant unbounded_string := to_unbounded_string( "2>&1" ); -- '2' + greater than + ampersand and '1', as an unbounded string itself_string : constant unbounded_string := to_unbounded_string( "@" ); -- itself, as an unbounded string ----------------------------------------------------------------------------- -- HIGH ASCII CHARACTERS ----------------------------------------------------------------------------- immediate_word_delimiter : character; immediate_sql_word_delimiter : character; high_ascii_escape : character; eof_character : character; ----------------------------------------------------------------------------- -- Current Source File Location ----------------------------------------------------------------------------- sourceFileNo : natural := 0; sourceLineNoLo : natural := 0; sourceLineNoHi : natural := 0; ----------------------------------------------------------------------------- -- PUT TOKEN -- -- For debugging, show the current token, its value, type and properities. ----------------------------------------------------------------------------- procedure Put_Token is begin -- show name Put( "'" & toEscaped( identifiers( token ).name ) & "'" ); -- show parser status if done then Put( " [DONE]" ); end if; if done_sub then Put( " [SUB]" ); end if; if error_found then Put( " [ERR]" ); end if; if exit_block then Put( " [EXITBLK]" ); end if; if syntax_check then Put( " [SYN]" ); end if; New_Line; -- show details Put_Line( " Symbol Table pos =" & token'img ); Put_Line( " Instruction counter first/current/last =" & firstPos'img & "/" & cmdpos'img & "/" & lastPos'img ); Put_Line( " Token type = " & identifiers( identifiers( token ).kind ).name ); Put_Line( " Token value = '" & ToEscaped( identifiers( token ).value ) & "'" ); Put( " Token properties = " ); if identifiers( token ).import then Put( "import " ); end if; if identifiers( token ).export then Put( "export " ); end if; if identifiers( token ).inspect then Put( "inspected " ); end if; if identifiers( token ).list then Put( "array" ); end if; if identifiers( token ).field_of /= eof_t and identifiers( token ).class = subClass then Put( "field of record " ); begin put( identifiers( identifiers( token ).field_of ).name ); exception when others => put( "unknown" ); end; end if; if identifiers( token ).field_of /= eof_t and identifiers( token ).class = constClass then Put( "formal parameter of " ); begin put( identifiers( identifiers( token ).field_of ).name ); exception when others => put( "unknown" ); end; end if; if is_keyword( token ) then Put( "(reserved keyword) " ); end if; New_Line; end Put_Token; ----------------------------------------------------------------------------- -- PUT IDENTIFIER ATTRIBUTES -- -- Show an identifier's attributes (that is, it's type). Shared by -- put_identifier and put_all_identifiers. ----------------------------------------------------------------------------- procedure put_identifier_attributes( id : identifier ) is ident : declaration renames identifiers( id ); -- the identifier kind : declaration renames identifiers( ident.kind ); -- and its type begin if ident.import then put( "imported " ); end if; if ident.export then put( "exported " ); end if; if ident.volatile then put( "volatile " ); end if; if ident.inspect then put( "inspected " ); end if; -- Show the class (type, constant, etc.) case ident.class is when subClass => if ident.field_of = eof_t then put( "subtype of " ); end if; when typeClass => if not ident.list then put( "new type of " ); end if; when constClass => if ident.field_of = eof_t then put( "constant " ); end if; when funcClass => put( "built-in function " ); when procClass => put( "built-in procedure " ); when userProcClass => put( "function " ); when userFuncClass => put( "procedure " ); when others => put( "identifier of the type " ); end case; -- Limited type? if kind.limit then put( "limited " ); end if; -- Show the type -- Failsafe: shouldn't be keyword, but just in case. if ident.kind = keyword_t then if ident.class /= funcClass and ident.class /= procClass and ident.class /= userProcClass and ident.class /= userFuncClass then put( "keyword" ); end if; else if kind.name = "an anonymous array" then -- special handling since they're not easily visible put( "anonymous array" ); begin put( firstBound( arrayID( to_numeric( kind.value ) ) )'img ); exception when others => put( " unknown" ); end; put( " .." ); begin put( lastBound( arrayID( to_numeric( kind.value ) ) )'img ); exception when others => put( " unknown" ); end; put( " of " ); if ident.kind = eof_t then put( " unknown" ); else put( identifiers( identifiers( ident.kind ).kind ).name ); end if; elsif ident.field_of /= eof_t and ident.class = subClass then put( "field of record type " ); put( identifiers( ident.field_of ).name ); put( " of type " ); put( kind.name ); elsif ident.field_of /= eof_t and ident.class = constClass then put( "formal parameter of " ); put( identifiers( ident.field_of ).name ); put( " of type " ); put( kind.name ); elsif ident.list and (ident.class = typeClass or ident.class = subClass) then put( "array" ); begin put( firstBound( arrayID( to_numeric( ident.value ) ) )'img ); exception when others => put( " unknown" ); end; put( " .." ); begin put( lastBound( arrayID( to_numeric( ident.value ) ) )'img ); exception when others => put( " unknown" ); end; put( " of " ); if ident.kind = eof_t then put( " unknown" ); else put( kind.name ); end if; elsif ident.kind = root_record_t then -- base record type -- or identifiers( getBaseType( ident.kind ) ).kind = root_record_t then -- record type? put( "record with " & ident.value & " fields" ); else if ident.kind = eof_t then put( " unknown" ); else put( kind.name ); end if; end if; if identifiers( ident.kind ).kind = root_enumerated_t then put( " enumerated item" ); end if; end if; end put_identifier_attributes; ----------------------------------------------------------------------------- -- PUT IDENTIFIER -- -- Show an identifier's name, value, attributes and type on standard output. -- Used by env command and for debugging. ----------------------------------------------------------------------------- procedure Put_Identifier( id : identifier ) is ident : declaration renames identifiers( id ); -- the identifier kind : declaration renames identifiers( ident.kind ); -- and its type begin if ident.deleted then put_line( "This identifier has been deleted" ); else -- Is it a reserved keyword? then nothing else to report if id <= reserved_top-1 then put_line( " ( AdaScript reserved word )" ); return; end if; -- Enumerated? this is prettier than "new type of root_enumerated" if ident.kind = root_enumerated_t then put_line( " ( enumerated type )" ); return; elsif ident.kind = variable_t then put_line( " ( private type )" ); return; end if; -- Show the value of the variable if ident.kind /= keyword_t then put( ident.name ); if not ident.list and ident.kind /= root_record_t then -- if not ident.list and ident.kind /= root_record_t and identifiers( getBaseType( ident.kind ) ).kind /= root_record_t then put( " := " ); -- (should really used root type to determine quoting) if identifiers( getBaseType( ident.kind ) ).kind = root_record_t then put( "(" ); declare field_id : identifier; numFields : natural; begin numFields := natural( to_numeric( identifiers( getBaseType( ident.kind ) ).value ) ); for i in 1..numFields loop findField( id, i, field_id ); put( delete( identifiers( field_id ).name, 1, length( ident.name ) + 1 ) ); -- skip record name + '.' put( " =>" ); put( ToEscaped( identifiers( field_id ).value ) ); if i /= numFields then put( "," ); end if; end loop; end; put( ")" ); elsif ident.class = userProcClass then put( '"' ); put( ToEscaped( ident.value ) ); put( '"' ); elsif ident.class = userFuncClass then put( '"' ); put( ToEscaped( ident.value ) ); put( '"' ); elsif ident.kind = string_t then put( '"' ); put( ToEscaped( ident.value ) ); put( '"' ); elsif ident.kind = character_t then put( "'" ); put( ToEscaped( ident.value ) ); put( "'" ); elsif not ident.list then put( ToEscaped( ident.value ) ); end if; end if; end if; -- Show the attributes put( "; -- " ); put_identifier_attributes( id ); new_line; end if; end Put_Identifier; ----------------------------------------------------------------------------- -- PUT ALL IDENTIFIERS -- -- Show all the identifiers in the symbol table. Use a tabular layout. ----------------------------------------------------------------------------- procedure put_all_identifiers is maxNameWidth : natural := 20; maxValueWidth : natural := natural( displayInfo.col ) / 3; escapedValue : unbounded_string; round : natural; firstChar : natural; lastChar : natural; begin -- Show all the information in a table for i in 1..identifiers_top-1 loop if not identifiers( i ).deleted then round := 1; firstChar := 0; lastChar := 0; if identifiers( i ).kind /= keyword_t and not identifiers( i ).list then escapedValue := toEscaped( identifiers( i ).value ); else escapedValue := null_unbounded_string; end if; loop if round = 1 then if length( identifiers( i ).name ) > maxNameWidth then put( identifiers( i ).name ); else put( head( identifiers( i ).name, maxNameWidth ) ); end if; else put( head( " ", maxNameWidth ) ); end if; put( " | " ); if length( escapedValue ) > 0 then firstChar := (round-1)*maxValueWidth+1; lastChar := round*maxValueWidth; if lastChar > length( escapedValue ) then lastChar := length( escapedValue ); end if; put( head( slice( escapedValue, firstChar, lastChar ), maxValueWidth ) ); else put( to_unbounded_string( integer( maxValueWidth ) * " ") ); end if; put( " | " ); if round = 1 then put_identifier_attributes( i ); end if; new_line; exit when lastChar = length( escapedValue ); round := round+1; end loop; end if; end loop; end put_all_identifiers; ----------------------------------------------------------------------------- -- PUT TRACE -- -- TRUE if the identifier is a keyword. ----------------------------------------------------------------------------- procedure put_trace( msg : string ) is -- Display an escaped message to standard error in the format used when -- "trace true" is used. This does not check the tracing flag. begin put_line( standard_error, "=> (" & toEscaped( to_unbounded_string( msg ) ) & ")" ); end put_trace; ----------------------------------------------------------------------------- -- Error reporting ----------------------------------------------------------------------------- ----------------------------------------------------------------------------- -- ERR -- -- Raise a BUSH exception by creating an error message (in GCC or BUSH format) -- and setting the error_found flag. -- -- Only display the first error encounted. Set the token to end-of-file -- (eof_t) to stop further parsing and set the error_found flag to indicate -- that an error has been encountered to stop further messages. ----------------------------------------------------------------------------- procedure err( msg : string ) is cmdline : unbounded_string; firstpos : natural; lastpos : natural; lineStr : unbounded_string; firstposStr : unbounded_string; lineno : natural; outLine : unbounded_string; begin -- Already displayed one error or script is complete? -- Don't display any more if error_found or done then return; end if; -- Decode a copy of the command line to show the error. Also returns -- the current token position and the line number. getCommandLine( cmdline, firstpos, lastpos, lineno ); -- If in a script (that is, a non-interactive input mode) then -- show the location and traceback. Otherwise, don't bother. if inputMode /= interactive and inputMode /= breakout then -- Get the location information. If gcc option, strip the leading -- blanks form the location information. Use outLine to generate a full -- line of text because individual Put's are shown as individual lines -- in Apache's error logs for templates...a real mess. if gccOpt then -- gcc style? lineStr := to_unbounded_string( lineno'img ); -- remove leading if length( lineStr ) > 0 then -- space (if any) if element( lineStr, 1 ) = ' ' then delete( lineStr, 1, 1 ); end if; end if; firstposStr := to_unbounded_string( firstpos'img ); if length( firstposStr ) > 0 then -- here, too if element( firstposStr, 1 ) = ' ' then delete( firstposStr, 1, 1 ); end if; end if; outLine := scriptFilePath & ":" & lineStr & ":" & firstposStr & ":"; -- no traceback else -- If not gcc option, show the location and traceback outLine := scriptFilePath -- otherwise & ":" & lineno'img & ":" & firstpos'img & ": "; if blocks_top > blocks'first then -- in a block? for i in reverse blocks'first..blocks_top-1 loop if i /= blocks_top-1 then outLine := outLine & " in "; end if; outLine := outLine & ToEscaped( blocks( i ).blockName ); end loop; put( standard_error, outLine ); outLine := null_unbounded_string; else outLine := outLine & "script"; put( standard_error, outLine ); outLine := null_unbounded_string; end if; New_Line( standard_error ); end if; end if; -- If not gcc option, show the command line, underline the token and show -- the error. Otherwise, just add the error to the location information. -- Output only full lines to avoid messy Apache error logs. if not gccOpt then -- Normal: Draw The Current Line put_line( standard_error, toEscaped( cmdline ) ); -- current line -- Normal: Draw Error Pointer outLine := outLine & to_string( (firstPos-1) * " " ); -- indent --for i in 1..firstpos-1 loop -- move to token -- put( standard_error, " " ); --end loop; outLine := outLine & '^'; -- token start if lastpos > firstpos then -- multi chars? outLine := outLine & to_string( (lastpos-firstPos-1) * "-" ); --for i in 1..lastpos-firstpos-1 loop -- put( standard_error, "-" ); --end loop; outLine := outLine & '^'; -- token end end if; outLine := outLine & ' '; -- token start end if; outLine := outLine & msg; put_line( standard_error, outLine ); -- error msg -- If we're in a template and in debug mode, put the error message on the -- web page. if boolean( debugOpt ) and processingTemplate then put( "" ); put( "" ); put( "" ); put( "" ); put( "
" ); put( "" ); put( " BUSH " ); put( "" ); put( " " & outLine & " " ); put( "
" ); put( "
" ); new_line; end if; error_found := true; -- flag error token := eof_t; -- stop parser end err; ----------------------------------------------------------------------------- -- ERR PREVIOUS -- -- Same as err below, but don't hilight the current token because the error -- actually happened before it. Just mark start of current token. ----------------------------------------------------------------------------- procedure err_previous( msg : string ) is savepos : integer; begin savepos := lastpos; -- save current token's last character position lastpos := firstpos; -- token length is one to produce a single '^' err( msg ); -- show message pointing at first character lastpos := savepos; -- restore current token's last character position end err_previous; ----------------------------------------------------------------------------- -- Scope ----------------------------------------------------------------------------- ----------------------------------------------------------------------------- -- PUSH BLOCK -- -- Start a block statement or scope by pushing the current symbol table stack -- information on the blocks stack. It also records the current -- scanner position so execution can return to the top of the block. -- (Pulling the block later will restore the symbol table to its old -- contents.) If newScope is true, a new nested identifier scope -- is also started. ----------------------------------------------------------------------------- procedure pushBlock( newScope : boolean := false; newName : string := "" ) is begin if blocks_top = block'last then -- no room? raise block_table_overflow; -- raise err else -- start new scope by recording current declare block : blockDeclaration renames blocks( blocks_top ); -- new block begin block.startpos := scriptLineStart; -- current line block.identifiers_top := identifiers_top; -- last ident block.newScope := newScope; -- scope flag block.blockName := To_Unbounded_String( newName ); -- name if any markScanner( block.state ); -- scanner pos end; blocks_top := blocks_top + 1; -- push stack end if; end pushBlock; ----------------------------------------------------------------------------- -- PULL BLOCK -- -- Discards a block statement from the blocks stack, discarding all -- declared identifiers. (pullBlock treats all declarations in the -- block as local. This is not a problem due to the problem of skipping -- over conditional code, but once that issue is solved I'll need to -- come back to this one.) ----------------------------------------------------------------------------- procedure pullBlock is begin if blocks_top = block'first then -- no blocks? raise block_table_overflow; -- raise err else pullArrayBlock( blocks_top ); -- do any a's blocks_top := blocks_top - 1; -- pop stack identifiers_top := blocks( blocks_top ).identifiers_top; -- pop decl's end if; end pullBlock; ----------------------------------------------------------------------------- -- TOP OF BLOCK -- -- Return to the top of the current block statement as marked by pushBlock. ----------------------------------------------------------------------------- procedure topOfBlock is --posn : long_integer; --discard : aliased unbounded_string; --b : boolean; begin if blocks_top = blocks'first then -- in a block? err( "internal error: topOfBlock: not in a block" ); else resumeScanning( blocks( blocks_top-1 ).state ); -- move to top if inputMode /= interactive and inputMode /= breakout then -- in a script? scriptLineStart := blocks( blocks_top-1).startpos; -- current line if trace and not exit_block and not error_found then -- display put( standard_error, "=> " & '"' ); -- line if put( standard_error, toEscaped( getCommandLine ) ); -- tracing put_line( standard_error, "" & '"' ); end if; end if; end if; end topOfBlock; ----------------------------------------------------------------------------- -- IS LOCAL -- -- True if the identifier is local to the current block's scope ----------------------------------------------------------------------------- function isLocal( id : identifier ) return boolean is blockStart : identifier := 1; -- start from very first identifier nextBlock : block; hasLocalBlock : boolean; begin if blocks_top > blocks'first then -- in a nested block? nextBlock := blocks_top - 1; -- start with latest hasLocalBlock := true; -- assume we'll find one while not blocks( nextBlock ).newScope loop -- if not new scope if nextBlock = 1 then -- hit bottom? hasLocalBlock := false; -- no local blocks exit; -- and quit end if; nextBlock := nextBlock - 1; -- try previous block end loop; if hasLocalBlock then -- found local scope? blockStart := blocks( nextBlock ).identifiers_top; -- use it end if; end if; --put_line( "isLocal: testing identifier " & identifiers( id ).name ); --put_line( "isLocal: blocks top is " & Blocks_top'img ); --put_line( "isLocal: local scope starts at block " & nextBlock'img ); --put_line( "isLocal: " & id'img & " >= " & blockStart'img ); return id >= blockStart; end isLocal; ----------------------------------------------------------------------------- -- GET BLOCK NAME -- -- return the name of the given block ----------------------------------------------------------------------------- function getBlockName( b : block ) return unbounded_string is -- return the name of the given block begin if b >= blocks_top then return null_unbounded_string; end if; return blocks( b ).blockName; end getBlockName; ----------------------------------------------------------------------------- -- Scanner Housekeeping ----------------------------------------------------------------------------- ----------------------------------------------------------------------------- -- SHUTDOWN SCANNER -- -- Shutdown the scanner. Run shutdown for the various BUSH packages. Clear -- the symbol table and block (scope) table. ----------------------------------------------------------------------------- procedure shutdownScanner is begin ShutdownDirOps; ShutdownPen; ShutdownStats; ShutdownNumerics; ShutdownStrings; ShutdownCommandLine; ShutdownLockFiles; ShutdownTextIO; ShutdownCGI; ShutdownCalendar; ShutdownUnits; ShutdownFiles; ShutdownArrays; ShutdownMySQL; ShutdownDB; ShutdownBushOS; -- Clear the block and identifier symbol table, just in case the -- scanner should be started again later. identifiers_top := identifiers'first; -- no keywords blocks_top := block'first; -- no blocks end shutdownScanner; ----------------------------------------------------------------------------- -- RESET SCANNER -- -- A less harsh version of shutdownScanner / startScanner. -- Restart the scanner by resetting scanner variables to their startup -- values and reload the standard identifiers. (Although keywords -- can't be unset by the user, standard identifiers can. The user -- can unset "false", for example. We'll initialize all standard -- identifiers here to make sure they exist. However, this raises -- issues for nested scripts if resetScanner is used for that -- in the future purpose.) ----------------------------------------------------------------------------- procedure resetScanner is --procedure importEnvironment is -- -- Declare all Environment Variables. If --import_all is not used, -- -- still declare PATH, PWD, OLDPWD, HOME, TERM if they exist. -- path_key : unbounded_string := to_unbounded_string( "PATH=" ); -- pwd_key : unbounded_string := to_unbounded_string( "PWD=" ); -- oldpwd_key : unbounded_string := to_unbounded_string( "OLDPWD=" ); -- home_key : unbounded_string := to_unbounded_string( "HOME=" ); -- term_key : unbounded_string := to_unbounded_string( "TERM=" ); -- shell_key : unbounded_string := to_unbounded_string( "SHELL=" ); -- ev : unbounded_string; -- an env var --begin --put_line( "IMPORTING ENVIRONMENT" ); -- DEBUG --put( "ENV COUNT = " ); put_line( Environment_Count'img ); -- DEBUG -- for i in 1..Environment_Count loop -- ev := to_unbounded_string( Environment_Value( i ) ); --put( "ENV = " ); put_line( ev ); -- DEBUG -- if Head( ev, 5 ) = path_key then -- init_env_ident( Environment_Value( i ) ); -- elsif Head( ev, 4 ) = pwd_key then -- init_env_ident( Environment_Value( i ) ); -- elsif Head( ev, 7 ) = oldpwd_key then -- init_env_ident( Environment_Value( i ) ); -- elsif Head( ev, 5 ) = home_key then -- init_env_ident( Environment_Value( i ) ); -- elsif Head( ev, 5 ) = term_key then -- init_env_ident( Environment_Value( i ) ); -- identifiers( identifiers_top-1).export := true; --put_line( " TERM FOUND" ); -- DEBUG -- elsif Head( ev, 6 ) = shell_key then -- init_env_ident( Environment_Value( i ) ); -- elsif importOpt then -- init_env_ident( Environment_Value( i ) ); -- end if; -- end loop; --end importEnvironment; procedure importEnvironment is -- Declare all Environment Variables. If --import_all is not used, -- still declare PATH, PWD, OLDPWD, HOME, TERM if they exist. path_key : unbounded_string := to_unbounded_string( "PATH=" ); pwd_key : unbounded_string := to_unbounded_string( "PWD=" ); oldpwd_key : unbounded_string := to_unbounded_string( "OLDPWD=" ); home_key : unbounded_string := to_unbounded_string( "HOME=" ); term_key : unbounded_string := to_unbounded_string( "TERM=" ); shell_key : unbounded_string := to_unbounded_string( "SHELL=" ); ev : unbounded_string; -- an env var begin for i in 1..environmentList.Length( initialEnvironment ) loop environmentList.Find( initialEnvironment, i, ev ); if Head( ev, 5 ) = path_key then init_env_ident( to_string( ev ) ); elsif Head( ev, 4 ) = pwd_key then init_env_ident( to_string( ev ) ); elsif Head( ev, 7 ) = oldpwd_key then init_env_ident( to_string( ev ) ); elsif Head( ev, 5 ) = home_key then init_env_ident( to_string( ev ) ); elsif Head( ev, 5 ) = term_key then init_env_ident( to_string( ev ) ); identifiers( identifiers_top-1).export := true; elsif Head( ev, 6 ) = shell_key then init_env_ident( to_string( ev ) ); elsif importOpt then init_env_ident( to_string( ev ) ); end if; end loop; end importEnvironment; temp_id : identifier; -- unused id begin -- Restore the scanner to a startup state. Discard all identifiers -- on the symbol table except the reserved keywords. identifiers_top := reserved_top; -- keep keywords blocks_top := block'first; -- no blocks error_found := false; -- no error onlyAda95 := false; -- no Ada_95 depreciatedMsg := Null_Unbounded_String; -- nothing dep. exit_block := false; -- not exiting restriction_no_auto_declarations := false; -- auto decl OK syntax_check := false; -- not checking last_status := 0; -- status OK done := false; -- not quitting trace := false; -- not tracing cmdpos := 3; -- first char -- Predefined types declareIdent( variable_t, "root variable type", keyword_t ); declareIdent( uni_numeric_t, "universal_numeric", variable_t, typeClass ); declareIdent( uni_string_t, "universal_string", variable_t, typeClass ); declareIdent( root_enumerated_t, "root enumerated", variable_t, typeClass ); declareIdent( root_record_t, "root record", variable_t, typeClass ); declareIdent( command_t, "command", variable_t, typeClass ); declareIdent( file_type_t, "file_type", variable_t, typeClass ); identifiers( identifiers_top-1 ).limit := true; -- limited type declareIdent( socket_type_t, "socket_type", variable_t, typeClass ); identifiers( identifiers_top-1 ).limit := true; -- limited type declareIdent( universal_t, "universal_typeless", variable_t, typeClass ); declareIdent( integer_t, "integer", uni_numeric_t, typeClass ); declareIdent( natural_t, "natural", uni_numeric_t, typeClass ); declareIdent( positive_t, "positive", uni_numeric_t, typeClass ); declareIdent( short_short_integer_t, "short_short_integer", uni_numeric_t, typeClass ); declareIdent( short_integer_t, "short_integer", uni_numeric_t, typeClass ); declareIdent( long_integer_t, "long_integer", uni_numeric_t, typeClass ); declareIdent( long_long_integer_t, "long_long_integer", uni_numeric_t, typeClass ); declareIdent( character_t, "character", uni_string_t, typeClass ); declareIdent( float_t, "float", uni_numeric_t, typeClass ); declareIdent( short_float_t, "short_float", uni_numeric_t, typeClass ); declareIdent( long_float_t, "long_float", uni_numeric_t, typeClass ); declareIdent( boolean_t, "boolean", root_enumerated_t, typeClass ); declareIdent( string_t, "string", uni_string_t, typeClass ); declareIdent( duration_t, "duration", uni_numeric_t, typeClass ); declareIdent( file_mode_t, "file_mode", root_enumerated_t, typeClass ); declareIdent( unbounded_string_t, "unbounded_string", uni_string_t, typeClass ); declareIdent( complex_t, "complex", root_record_t, typeClass ); identifiers( complex_t ).value := to_unbounded_string( "2" ); declareIdent( complex_real_t, "complex.re", long_float_t, subClass ); identifiers( complex_real_t ).field_of := complex_t; identifiers( complex_real_t ).value := to_unbounded_string( "1" ); declareIdent( complex_imaginary_t, "complex.im", long_float_t, subClass ); identifiers( complex_imaginary_t ).field_of := complex_t; identifiers( complex_imaginary_t ).value := to_unbounded_string( "2" ); -- Literals declareIdent( backlit_t, "Backquote Literal", uni_string_t ); declareIdent( strlit_t, "String Literal", uni_string_t ); declareIdent( charlit_t, "Character Literal", uni_string_t ); declareIdent( number_t, "Numeric Literal", uni_numeric_t ); declareIdent( imm_delim_t, "", symbol_t ); immediate_word_delimiter := toHighASCII( imm_delim_t ); declareIdent( imm_sql_delim_t, "", symbol_t ); immediate_sql_word_delimiter := toHighASCII( imm_sql_delim_t ); declareIdent( word_t, "Word", uni_string_t ); declareIdent( sql_word_t, "SQL Word", uni_string_t ); declareIdent( char_escape_t, "", symbol_t ); high_ascii_escape := toHighASCII( char_escape_t ); -- VM Special Instructions -- declareKeyword( load_nr_t, "[Load Numeric Register]" ); -- declareKeyword( load_sr_t, "[Load String Register]" ); -- declareKeyword( load_ir_t, "[Load Index Register]" ); -- declareKeyword( fetch_nr_t, "[Load Numeric Register]" ); -- declareKeyword( fetch_sr_t, "[Load String Register]" ); -- declareKeyword( fetch_ir_t, "[Load Index Register]" ); -- Boolean enumerated declareStandardConstant( false_t, "false", boolean_t, "0" ); declareStandardConstant( true_t, "true", boolean_t, "1" ); -- Standard Package constants: ASCII declareStandardConstant( "ASCII.NUL", character_t, "" & ASCII.NUL ); declareStandardConstant( "ASCII.SOH", character_t, "" & ASCII.soh ); declareStandardConstant( "ASCII.STX", character_t, "" & ASCII.stx ); declareStandardConstant( "ASCII.ETX", character_t, "" & ASCII.etx ); declareStandardConstant( "ASCII.EOT", character_t, "" & ASCII.eot ); declareStandardConstant( "ASCII.ENQ", character_t, "" & ASCII.enq ); declareStandardConstant( "ASCII.ACK", character_t, "" & ASCII.ack ); declareStandardConstant( "ASCII.BEL", character_t, "" & ASCII.bel ); declareStandardConstant( "ASCII.BS", character_t, "" & ASCII.bs ); declareStandardConstant( "ASCII.HT", character_t, "" & ASCII.ht ); declareStandardConstant( "ASCII.LF", character_t, "" & ASCII.lf ); declareStandardConstant( "ASCII.VT", character_t, "" & ASCII.vt ); declareStandardConstant( "ASCII.FF", character_t, "" & ASCII.ff ); declareStandardConstant( "ASCII.CR", character_t, "" & ASCII.cr ); declareStandardConstant( "ASCII.SO", character_t, "" & ASCII.so ); declareStandardConstant( "ASCII.SI", character_t, "" & ASCII.si ); declareStandardConstant( "ASCII.DLE", character_t, "" & ASCII.dle ); declareStandardConstant( "ASCII.DC1", character_t, "" & ASCII.dc1 ); declareStandardConstant( "ASCII.DC2", character_t, "" & ASCII.dc2 ); declareStandardConstant( "ASCII.DC3", character_t, "" & ASCII.dc3 ); declareStandardConstant( "ASCII.DC4", character_t, "" & ASCII.dc4 ); declareStandardConstant( "ASCII.NAK", character_t, "" & ASCII.nak ); declareStandardConstant( "ASCII.SYN", character_t, "" & ASCII.syn ); declareStandardConstant( "ASCII.ETB", character_t, "" & ASCII.etb ); declareStandardConstant( "ASCII.CAN", character_t, "" & ASCII.can ); declareStandardConstant( "ASCII.EM", character_t, "" & ASCII.em ); declareStandardConstant( "ASCII.SUB", character_t, "" & ASCII.sub ); declareStandardConstant( "ASCII.ESC", character_t, "" & ASCII.esc ); declareStandardConstant( "ASCII.FS", character_t, "" & ASCII.fs ); declareStandardConstant( "ASCII.GS", character_t, "" & ASCII.gs ); declareStandardConstant( "ASCII.RS", character_t, "" & ASCII.rs ); declareStandardConstant( "ASCII.US", character_t, "" & ASCII.us ); declareStandardConstant( "ASCII.DEL", character_t, "" & ASCII.del ); declareStandardConstant( "ASCII.Exclam", character_t, "!" ); declareStandardConstant( "ASCII.Quotation", character_t, """" ); declareStandardConstant( "ASCII.Sharp", character_t, "#" ); declareStandardConstant( "ASCII.Dollar", character_t, "$" ); declareStandardConstant( "ASCII.Percent", character_t, "%" ); declareStandardConstant( "ASCII.Ampersand", character_t, "&" ); declareStandardConstant( "ASCII.Colon", character_t, ":" ); declareStandardConstant( "ASCII.Semicolon", character_t, ";" ); declareStandardConstant( "ASCII.Query", character_t, "?" ); declareStandardConstant( "ASCII.At_Sign", character_t, "@" ); declareStandardConstant( "ASCII.L_Bracket", character_t, "[" ); declareStandardConstant( "ASCII.Back_Slash",character_t, "\" ); declareStandardConstant( "ASCII.R_Bracket", character_t, "]" ); declareStandardConstant( "ASCII.Circumflex",character_t, "^" ); declareStandardConstant( "ASCII.Underline", character_t, "_" ); declareStandardConstant( "ASCII.Grave", character_t, "`" ); declareStandardConstant( "ASCII.L_Brace", character_t, "{" ); declareStandardConstant( "ASCII.Bar", character_t, "|" ); declareStandardConstant( "ASCII.R_Brace", character_t, "}" ); declareStandardConstant( "ASCII.Tilde", character_t, "~" ); declareStandardConstant( "ASCII.LC_A", character_t, "a" ); declareStandardConstant( "ASCII.LC_B", character_t, "b" ); declareStandardConstant( "ASCII.LC_C", character_t, "c" ); declareStandardConstant( "ASCII.LC_D", character_t, "d" ); declareStandardConstant( "ASCII.LC_E", character_t, "e" ); declareStandardConstant( "ASCII.LC_F", character_t, "f" ); declareStandardConstant( "ASCII.LC_G", character_t, "g" ); declareStandardConstant( "ASCII.LC_H", character_t, "h" ); declareStandardConstant( "ASCII.LC_I", character_t, "i" ); declareStandardConstant( "ASCII.LC_J", character_t, "j" ); declareStandardConstant( "ASCII.LC_K", character_t, "k" ); declareStandardConstant( "ASCII.LC_L", character_t, "l" ); declareStandardConstant( "ASCII.LC_M", character_t, "m" ); declareStandardConstant( "ASCII.LC_N", character_t, "n" ); declareStandardConstant( "ASCII.LC_O", character_t, "o" ); declareStandardConstant( "ASCII.LC_P", character_t, "p" ); declareStandardConstant( "ASCII.LC_Q", character_t, "q" ); declareStandardConstant( "ASCII.LC_R", character_t, "r" ); declareStandardConstant( "ASCII.LC_S", character_t, "s" ); declareStandardConstant( "ASCII.LC_T", character_t, "t" ); declareStandardConstant( "ASCII.LC_U", character_t, "u" ); declareStandardConstant( "ASCII.LC_V", character_t, "v" ); declareStandardConstant( "ASCII.LC_W", character_t, "w" ); declareStandardConstant( "ASCII.LC_X", character_t, "x" ); declareStandardConstant( "ASCII.LC_Y", character_t, "y" ); declareStandardConstant( "ASCII.LC_Z", character_t, "z" ); declareStandardConstant( "ASCII.UC_A", character_t, "A" ); declareStandardConstant( "ASCII.UC_B", character_t, "B" ); declareStandardConstant( "ASCII.UC_C", character_t, "C" ); declareStandardConstant( "ASCII.UC_D", character_t, "D" ); declareStandardConstant( "ASCII.UC_E", character_t, "E" ); declareStandardConstant( "ASCII.UC_F", character_t, "F" ); declareStandardConstant( "ASCII.UC_G", character_t, "G" ); declareStandardConstant( "ASCII.UC_H", character_t, "H" ); declareStandardConstant( "ASCII.UC_I", character_t, "I" ); declareStandardConstant( "ASCII.UC_J", character_t, "J" ); declareStandardConstant( "ASCII.UC_K", character_t, "K" ); declareStandardConstant( "ASCII.UC_L", character_t, "L" ); declareStandardConstant( "ASCII.UC_M", character_t, "M" ); declareStandardConstant( "ASCII.UC_N", character_t, "N" ); declareStandardConstant( "ASCII.UC_O", character_t, "O" ); declareStandardConstant( "ASCII.UC_P", character_t, "P" ); declareStandardConstant( "ASCII.UC_Q", character_t, "Q" ); declareStandardConstant( "ASCII.UC_R", character_t, "R" ); declareStandardConstant( "ASCII.UC_S", character_t, "S" ); declareStandardConstant( "ASCII.UC_T", character_t, "T" ); declareStandardConstant( "ASCII.UC_U", character_t, "U" ); declareStandardConstant( "ASCII.UC_V", character_t, "V" ); declareStandardConstant( "ASCII.UC_W", character_t, "W" ); declareStandardConstant( "ASCII.UC_X", character_t, "X" ); declareStandardConstant( "ASCII.UC_Y", character_t, "Y" ); declareStandardConstant( "ASCII.UC_Z", character_t, "Z" ); -- Latin_1 ------------------------ -- Control Characters -- ------------------------ declareStandardConstant( "Latin_1.NUL", character_t, "" & Character'Val (0)); declareStandardConstant( "Latin_1.SOH", character_t, "" & Character'Val (1)); declareStandardConstant( "Latin_1.STX", character_t, "" & Character'Val (2)); declareStandardConstant( "Latin_1.ETX", character_t, "" & Character'Val (3)); declareStandardConstant( "Latin_1.EOT", character_t, "" & Character'Val (4)); declareStandardConstant( "Latin_1.ENQ", character_t, "" & Character'Val (5)); declareStandardConstant( "Latin_1.ACK", character_t, "" & Character'Val (6)); declareStandardConstant( "Latin_1.BEL", character_t, "" & Character'Val (7)); declareStandardConstant( "Latin_1.BS", character_t, "" & Character'Val (8)); declareStandardConstant( "Latin_1.HT", character_t, "" & Character'Val (9)); declareStandardConstant( "Latin_1.LF", character_t, "" & Character'Val (10)); declareStandardConstant( "Latin_1.VT", character_t, "" & Character'Val (11)); declareStandardConstant( "Latin_1.FF", character_t, "" & Character'Val (12)); declareStandardConstant( "Latin_1.CR", character_t, "" & Character'Val (13)); declareStandardConstant( "Latin_1.SO", character_t, "" & Character'Val (14)); declareStandardConstant( "Latin_1.SI", character_t, "" & Character'Val (15)); declareStandardConstant( "Latin_1.DLE", character_t, "" & Character'Val (16)); declareStandardConstant( "Latin_1.DC1", character_t, "" & Character'Val (17)); declareStandardConstant( "Latin_1.DC2", character_t, "" & Character'Val (18)); declareStandardConstant( "Latin_1.DC3", character_t, "" & Character'Val (19)); declareStandardConstant( "Latin_1.DC4", character_t, "" & Character'Val (20)); declareStandardConstant( "Latin_1.NAK", character_t, "" & Character'Val (21)); declareStandardConstant( "Latin_1.SYN", character_t, "" & Character'Val (22)); declareStandardConstant( "Latin_1.ETB", character_t, "" & Character'Val (23)); declareStandardConstant( "Latin_1.CAN", character_t, "" & Character'Val (24)); declareStandardConstant( "Latin_1.EM", character_t, "" & Character'Val (25)); declareStandardConstant( "Latin_1.SUB", character_t, "" & Character'Val (26)); declareStandardConstant( "Latin_1.ESC", character_t, "" & Character'Val (27)); declareStandardConstant( "Latin_1.FS", character_t, "" & Character'Val (28)); declareStandardConstant( "Latin_1.GS", character_t, "" & Character'Val (29)); declareStandardConstant( "Latin_1.RS", character_t, "" & Character'Val (30)); declareStandardConstant( "Latin_1.US", character_t, "" & Character'Val (31)); -------------------------------- -- ISO 646 Graphic Characters -- -------------------------------- declareStandardConstant( "Latin_1.Space", character_t, " "); declareStandardConstant( "Latin_1.Exclamation", character_t, "!"); declareStandardConstant( "Latin_1.Quotation", character_t, """"); declareStandardConstant( "Latin_1.Number_Sign", character_t, "#"); declareStandardConstant( "Latin_1.Dollar_Sign", character_t, "$"); declareStandardConstant( "Latin_1.Percent_Sign", character_t, "%"); declareStandardConstant( "Latin_1.Ampersand", character_t, "&"); declareStandardConstant( "Latin_1.Apostrophe", character_t, "'"); declareStandardConstant( "Latin_1.Left_Parenthesis", character_t, "("); declareStandardConstant( "Latin_1.Right_Parenthesis", character_t, ")"); declareStandardConstant( "Latin_1.Asterisk", character_t, "*"); declareStandardConstant( "Latin_1.Plus_Sign", character_t, "+"); declareStandardConstant( "Latin_1.Comma", character_t, ","); declareStandardConstant( "Latin_1.Hyphen", character_t, "-"); declareStandardConstant( "Latin_1.Minus_Sign", character_t, "-"); declareStandardConstant( "Latin_1.Full_Stop", character_t, "."); declareStandardConstant( "Latin_1.Solidus", character_t, "/"); -- Decimal digits '0' though '9' are at positions 48 through 57 declareStandardConstant( "Latin_1.Colon", character_t, ":"); declareStandardConstant( "Latin_1.Semicolon", character_t, ";"); declareStandardConstant( "Latin_1.Less_Than_Sign", character_t, "<"); declareStandardConstant( "Latin_1.Equals_Sign", character_t, "="); declareStandardConstant( "Latin_1.Greater_Than_Sign", character_t, ">"); declareStandardConstant( "Latin_1.Question", character_t, "?"); declareStandardConstant( "Latin_1.Commercial_At", character_t, "@"); -- Letters 'A' through 'Z' are at positions 65 through 90 declareStandardConstant( "Latin_1.Left_Square_Bracket", character_t, "["); declareStandardConstant( "Latin_1.Reverse_Solidus", character_t, "\"); declareStandardConstant( "Latin_1.Right_Square_Bracket", character_t, "]"); declareStandardConstant( "Latin_1.Circumflex", character_t, "^"); declareStandardConstant( "Latin_1.Low_Line", character_t, "_"); declareStandardConstant( "Latin_1.Grave", character_t, "`"); declareStandardConstant( "Latin_1.LC_A", character_t, "a"); declareStandardConstant( "Latin_1.LC_B", character_t, "b"); declareStandardConstant( "Latin_1.LC_C", character_t, "c"); declareStandardConstant( "Latin_1.LC_D", character_t, "d"); declareStandardConstant( "Latin_1.LC_E", character_t, "e"); declareStandardConstant( "Latin_1.LC_F", character_t, "f"); declareStandardConstant( "Latin_1.LC_G", character_t, "g"); declareStandardConstant( "Latin_1.LC_H", character_t, "h"); declareStandardConstant( "Latin_1.LC_I", character_t, "i"); declareStandardConstant( "Latin_1.LC_J", character_t, "j"); declareStandardConstant( "Latin_1.LC_K", character_t, "k"); declareStandardConstant( "Latin_1.LC_L", character_t, "l"); declareStandardConstant( "Latin_1.LC_M", character_t, "m"); declareStandardConstant( "Latin_1.LC_N", character_t, "n"); declareStandardConstant( "Latin_1.LC_O", character_t, "o"); declareStandardConstant( "Latin_1.LC_P", character_t, "p"); declareStandardConstant( "Latin_1.LC_Q", character_t, "q"); declareStandardConstant( "Latin_1.LC_R", character_t, "r"); declareStandardConstant( "Latin_1.LC_S", character_t, "s"); declareStandardConstant( "Latin_1.LC_T", character_t, "t"); declareStandardConstant( "Latin_1.LC_U", character_t, "u"); declareStandardConstant( "Latin_1.LC_V", character_t, "v"); declareStandardConstant( "Latin_1.LC_W", character_t, "w"); declareStandardConstant( "Latin_1.LC_X", character_t, "x"); declareStandardConstant( "Latin_1.LC_Y", character_t, "y"); declareStandardConstant( "Latin_1.LC_Z", character_t, "z"); declareStandardConstant( "Latin_1.Left_Curly_Bracket", character_t, "{"); declareStandardConstant( "Latin_1.Vertical_Line", character_t, "|"); declareStandardConstant( "Latin_1.Right_Curly_Bracket", character_t, "}"); declareStandardConstant( "Latin_1.Tilde", character_t, "~"); declareStandardConstant( "Latin_1.DEL", character_t, "" & Character'Val (127)); --------------------------------- -- ISO 6429 Control Characters -- --------------------------------- declareStandardConstant( "Latin_1.IS4", character_t, "" & Character'Val (28)); declareStandardConstant( "Latin_1.IS3", character_t, "" & Character'Val (29)); declareStandardConstant( "Latin_1.IS2", character_t, "" & Character'Val (30)); declareStandardConstant( "Latin_1.IS1", character_t, "" & Character'Val (31)); declareStandardConstant( "Latin_1.Reserved_128", character_t, "" & Character'Val (128)); declareStandardConstant( "Latin_1.Reserved_129", character_t, "" & Character'Val (129)); declareStandardConstant( "Latin_1.BPH", character_t, "" & Character'Val (130)); declareStandardConstant( "Latin_1.NBH", character_t, "" & Character'Val (131)); declareStandardConstant( "Latin_1.Reserved_132", character_t, "" & Character'Val (132)); declareStandardConstant( "Latin_1.NEL", character_t, "" & Character'Val (133)); declareStandardConstant( "Latin_1.SSA", character_t, "" & Character'Val (134)); declareStandardConstant( "Latin_1.ESA", character_t, "" & Character'Val (135)); declareStandardConstant( "Latin_1.HTS", character_t, "" & Character'Val (136)); declareStandardConstant( "Latin_1.HTJ", character_t, "" & Character'Val (137)); declareStandardConstant( "Latin_1.VTS", character_t, "" & Character'Val (138)); declareStandardConstant( "Latin_1.PLD", character_t, "" & Character'Val (139)); declareStandardConstant( "Latin_1.PLU", character_t, "" & Character'Val (140)); declareStandardConstant( "Latin_1.RI", character_t, "" & Character'Val (141)); declareStandardConstant( "Latin_1.SS2", character_t, "" & Character'Val (142)); declareStandardConstant( "Latin_1.SS3", character_t, "" & Character'Val (143)); declareStandardConstant( "Latin_1.DCS", character_t, "" & Character'Val (144)); declareStandardConstant( "Latin_1.PU1", character_t, "" & Character'Val (145)); declareStandardConstant( "Latin_1.PU2", character_t, "" & Character'Val (146)); declareStandardConstant( "Latin_1.STS", character_t, "" & Character'Val (147)); declareStandardConstant( "Latin_1.CCH", character_t, "" & Character'Val (148)); declareStandardConstant( "Latin_1.MW", character_t, "" & Character'Val (149)); declareStandardConstant( "Latin_1.SPA", character_t, "" & Character'Val (150)); declareStandardConstant( "Latin_1.EPA", character_t, "" & Character'Val (151)); declareStandardConstant( "Latin_1.SOS", character_t, "" & Character'Val (152)); declareStandardConstant( "Latin_1.Reserved_153", character_t, "" & Character'Val (153)); declareStandardConstant( "Latin_1.SCI", character_t, "" & Character'Val (154)); declareStandardConstant( "Latin_1.CSI", character_t, "" & Character'Val (155)); declareStandardConstant( "Latin_1.ST", character_t, "" & Character'Val (156)); declareStandardConstant( "Latin_1.OSC", character_t, "" & Character'Val (157)); declareStandardConstant( "Latin_1.PM", character_t, "" & Character'Val (158)); declareStandardConstant( "Latin_1.APC", character_t, "" & Character'Val (159)); ------------------------------ -- Other Graphic Characters -- ------------------------------ -- Character positions 160 (16#A0#) .. 175 (16#AF#) declareStandardConstant( "Latin_1.No_Break_Space", character_t, "" & Character'Val (160)); declareStandardConstant( "Latin_1.NBSP", character_t, "" & Character'Val (160)); declareStandardConstant( "Latin_1.Inverted_Exclamation", character_t, "" & Character'Val (161)); declareStandardConstant( "Latin_1.Cent_Sign", character_t, "" & Character'Val (162)); declareStandardConstant( "Latin_1.Pound_Sign", character_t, "" & Character'Val (163)); declareStandardConstant( "Latin_1.Currency_Sign", character_t, "" & Character'Val (164)); declareStandardConstant( "Latin_1.Yen_Sign", character_t, "" & Character'Val (165)); declareStandardConstant( "Latin_1.Broken_Bar", character_t, "" & Character'Val (166)); declareStandardConstant( "Latin_1.Section_Sign", character_t, "" & Character'Val (167)); declareStandardConstant( "Latin_1.Diaeresis", character_t, "" & Character'Val (168)); declareStandardConstant( "Latin_1.Copyright_Sign", character_t, "" & Character'Val (169)); declareStandardConstant( "Latin_1.Feminine_Ordinal_Indicator", character_t, "" & Character'Val (170)); declareStandardConstant( "Latin_1.Left_Angle_Quotation", character_t, "" & Character'Val (171)); declareStandardConstant( "Latin_1.Not_Sign", character_t, "" & Character'Val (172)); declareStandardConstant( "Latin_1.Soft_Hyphen", character_t, "" & Character'Val (173)); declareStandardConstant( "Latin_1.Registered_Trade_Mark_Sign", character_t, "" & Character'Val (174)); declareStandardConstant( "Latin_1.Macron", character_t, "" & Character'Val (175)); -- Character positions 176 (16#B0#) .. 191 (16#BF#) declareStandardConstant( "Latin_1.Degree_Sign", character_t, "" & Character'Val (176)); declareStandardConstant( "Latin_1.Ring_Above", character_t, "" & "" & Character'Val (176)); declareStandardConstant( "Latin_1.Plus_Minus_Sign", character_t, "" & Character'Val (177)); declareStandardConstant( "Latin_1.Superscript_Two", character_t, "" & Character'Val (178)); declareStandardConstant( "Latin_1.Superscript_Three", character_t, "" & Character'Val (179)); declareStandardConstant( "Latin_1.Acute", character_t, "" & Character'Val (180)); declareStandardConstant( "Latin_1.Micro_Sign", character_t, "" & Character'Val (181)); declareStandardConstant( "Latin_1.Pilcrow_Sign", character_t, "" & Character'Val (182)); declareStandardConstant( "Latin_1.Paragraph_Sign", character_t, "" & Character'Val (182)); declareStandardConstant( "Latin_1.Middle_Dot", character_t, "" & Character'Val (183)); declareStandardConstant( "Latin_1.Cedilla", character_t, "" & Character'Val (184)); declareStandardConstant( "Latin_1.Superscript_One", character_t, "" & Character'Val (185)); declareStandardConstant( "Latin_1.Masculine_Ordinal_Indicator", character_t, "" & Character'Val (186)); declareStandardConstant( "Latin_1.Right_Angle_Quotation", character_t, "" & Character'Val (187)); declareStandardConstant( "Latin_1.Fraction_One_Quarter", character_t, "" & Character'Val (188)); declareStandardConstant( "Latin_1.Fraction_One_Half", character_t, "" & Character'Val (189)); declareStandardConstant( "Latin_1.Fraction_Three_Quarters", character_t, "" & Character'Val (190)); declareStandardConstant( "Latin_1.Inverted_Question", character_t, "" & Character'Val (191)); -- Character positions 192 (16#C0#) .. 207 (16#CF#) declareStandardConstant( "Latin_1.UC_A_Grave", character_t, "" & Character'Val (192)); declareStandardConstant( "Latin_1.UC_A_Acute", character_t, "" & Character'Val (193)); declareStandardConstant( "Latin_1.UC_A_Circumflex", character_t, "" & Character'Val (194)); declareStandardConstant( "Latin_1.UC_A_Tilde", character_t, "" & Character'Val (195)); declareStandardConstant( "Latin_1.UC_A_Diaeresis", character_t, "" & Character'Val (196)); declareStandardConstant( "Latin_1.UC_A_Ring", character_t, "" & Character'Val (197)); declareStandardConstant( "Latin_1.UC_AE_Diphthong", character_t, "" & Character'Val (198)); declareStandardConstant( "Latin_1.UC_C_Cedilla", character_t, "" & Character'Val (199)); declareStandardConstant( "Latin_1.UC_E_Grave", character_t, "" & Character'Val (200)); declareStandardConstant( "Latin_1.UC_E_Acute", character_t, "" & Character'Val (201)); declareStandardConstant( "Latin_1.UC_E_Circumflex", character_t, "" & Character'Val (202)); declareStandardConstant( "Latin_1.UC_E_Diaeresis", character_t, "" & Character'Val (203)); declareStandardConstant( "Latin_1.UC_I_Grave", character_t, "" & Character'Val (204)); declareStandardConstant( "Latin_1.UC_I_Acute", character_t, "" & Character'Val (205)); declareStandardConstant( "Latin_1.UC_I_Circumflex", character_t, "" & Character'Val (206)); declareStandardConstant( "Latin_1.UC_I_Diaeresis", character_t, "" & Character'Val (207)); -- Character positions 208 (16#D0#) .. 223 (16#DF#) declareStandardConstant( "Latin_1.UC_Icelandic_Eth", character_t, "" & Character'Val (208)); declareStandardConstant( "Latin_1.UC_N_Tilde", character_t, "" & Character'Val (209)); declareStandardConstant( "Latin_1.UC_O_Grave", character_t, "" & Character'Val (210)); declareStandardConstant( "Latin_1.UC_O_Acute", character_t, "" & Character'Val (211)); declareStandardConstant( "Latin_1.UC_O_Circumflex", character_t, "" & Character'Val (212)); declareStandardConstant( "Latin_1.UC_O_Tilde", character_t, "" & Character'Val (213)); declareStandardConstant( "Latin_1.UC_O_Diaeresis", character_t, "" & Character'Val (214)); declareStandardConstant( "Latin_1.Multiplication_Sign", character_t, "" & Character'Val (215)); declareStandardConstant( "Latin_1.UC_O_Oblique_Stroke", character_t, "" & Character'Val (216)); declareStandardConstant( "Latin_1.UC_U_Grave", character_t, "" & Character'Val (217)); declareStandardConstant( "Latin_1.UC_U_Acute", character_t, "" & Character'Val (218)); declareStandardConstant( "Latin_1.UC_U_Circumflex", character_t, "" & Character'Val (219)); declareStandardConstant( "Latin_1.UC_U_Diaeresis", character_t, "" & Character'Val (220)); declareStandardConstant( "Latin_1.UC_Y_Acute", character_t, "" & Character'Val (221)); declareStandardConstant( "Latin_1.UC_Icelandic_Thorn", character_t, "" & Character'Val (222)); declareStandardConstant( "Latin_1.LC_German_Sharp_S", character_t, "" & Character'Val (223)); -- Character positions 224 (16#E0#) .. 239 (16#EF#) declareStandardConstant( "Latin_1.LC_A_Grave", character_t, "" & Character'Val (224)); declareStandardConstant( "Latin_1.LC_A_Acute", character_t, "" & Character'Val (225)); declareStandardConstant( "Latin_1.LC_A_Circumflex", character_t, "" & Character'Val (226)); declareStandardConstant( "Latin_1.LC_A_Tilde", character_t, "" & Character'Val (227)); declareStandardConstant( "Latin_1.LC_A_Diaeresis", character_t, "" & Character'Val (228)); declareStandardConstant( "Latin_1.LC_A_Ring", character_t, "" & Character'Val (229)); declareStandardConstant( "Latin_1.LC_AE_Diphthong", character_t, "" & Character'Val (230)); declareStandardConstant( "Latin_1.LC_C_Cedilla", character_t, "" & Character'Val (231)); declareStandardConstant( "Latin_1.LC_E_Grave", character_t, "" & Character'Val (232)); declareStandardConstant( "Latin_1.LC_E_Acute", character_t, "" & Character'Val (233)); declareStandardConstant( "Latin_1.LC_E_Circumflex", character_t, "" & Character'Val (234)); declareStandardConstant( "Latin_1.LC_E_Diaeresis", character_t, "" & Character'Val (235)); declareStandardConstant( "Latin_1.LC_I_Grave", character_t, "" & Character'Val (236)); declareStandardConstant( "Latin_1.LC_I_Acute", character_t, "" & Character'Val (237)); declareStandardConstant( "Latin_1.LC_I_Circumflex", character_t, "" & Character'Val (238)); declareStandardConstant( "Latin_1.LC_I_Diaeresis", character_t, "" & Character'Val (239)); -- Character positions 240 (16#F0#) .. 255 (16#FF) declareStandardConstant( "Latin_1.LC_Icelandic_Eth", character_t, "" & Character'Val (240)); declareStandardConstant( "Latin_1.LC_N_Tilde", character_t, "" & Character'Val (241)); declareStandardConstant( "Latin_1.LC_O_Grave", character_t, "" & Character'Val (242)); declareStandardConstant( "Latin_1.LC_O_Acute", character_t, "" & Character'Val (243)); declareStandardConstant( "Latin_1.LC_O_Circumflex", character_t, "" & Character'Val (244)); declareStandardConstant( "Latin_1.LC_O_Tilde", character_t, "" & Character'Val (245)); declareStandardConstant( "Latin_1.LC_O_Diaeresis", character_t, "" & Character'Val (246)); declareStandardConstant( "Latin_1.Division_Sign", character_t, "" & Character'Val (247)); declareStandardConstant( "Latin_1.LC_O_Oblique_Stroke", character_t, "" & Character'Val (248)); declareStandardConstant( "Latin_1.LC_U_Grave", character_t, "" & Character'Val (249)); declareStandardConstant( "Latin_1.LC_U_Acute", character_t, "" & Character'Val (250)); declareStandardConstant( "Latin_1.LC_U_Circumflex", character_t, "" & Character'Val (251)); declareStandardConstant( "Latin_1.LC_U_Diaeresis", character_t, "" & Character'Val (252)); declareStandardConstant( "Latin_1.LC_Y_Acute", character_t, "" & Character'Val (253)); declareStandardConstant( "Latin_1.LC_Icelandic_Thorn", character_t, "" & Character'Val (254)); declareStandardConstant( "Latin_1.LC_Y_Diaeresis", character_t, "" & Character'Val (255)); -- System Package constants declareStandardConstant( "System.System_Name", uni_string_t, "SYSTEM_NAME_BUSH" ); declareStandardConstant( "System.Min_Int", uni_numeric_t, to_string( to_unbounded_string( long_float( integerOutputType'first+0.9 ) ) ) ); -- out minimum integer is the limit of a long_float's mantissa. should -- probably check that system.min_int isn't smaller, but Gnat gives bogus -- result on long_float( max_int ) if mantissa isn't big enough. declareStandardConstant( "System.Max_Int", uni_numeric_t, to_string( to_unbounded_string( maxInteger ) ) ); -- out maximum integer is the limit of a long_float's mantissa -- probably check that system.min_max isn't smaller declareStandardConstant( "System.Max_Binary_Modulus", uni_numeric_t, long_long_float'image( long_long_float( system.max_binary_modulus ) ) ); declareStandardConstant( "System.Max_Nonbinary_Modulus", uni_numeric_t, long_long_float'image( long_long_float( system.max_nonbinary_modulus ) ) ); declareStandardConstant( "System.Max_Base_Digits", uni_numeric_t, system.max_base_digits'img ); declareStandardConstant( "System.Max_Digits", uni_numeric_t, system.max_digits'img ); declareStandardConstant( "System.Max_Mantissa", uni_numeric_t, system.max_mantissa'img ); declareStandardConstant( "System.Fine_Delta", uni_numeric_t, system.fine_delta'img ); declareStandardConstant( "System.Tick", uni_numeric_t, system.tick'img ); declareStandardConstant( "System.Storage_Unit", uni_numeric_t, system.storage_unit'img ); declareStandardConstant( "System.Word_Size", uni_numeric_t, system.word_size'img ); declareStandardConstant( "System.Memory_Size", uni_numeric_t, long_float'image( long_float( system.memory_size ) ) ); -- NOTE: This was a universal integer but memory_size of 512 MB or larger gave an 'img error -- so it is now a long float. declareStandardConstant( "System.Default_Bit_Order", uni_string_t, system.default_bit_order'img ); declareStandardConstant( "System.Login_Shell", boolean_t, integer'image( boolean'pos(isLoginShell))(2) & "" ); declareStandardConstant( "System.Restricted_Shell", boolean_t, integer'image( commandLineOption'pos(rshOpt))(2) & "" ); -- most of the source_info must be filled in later by the parser declareStandardConstant( source_info_file_t, "source_info.file", uni_string_t, "" ); declareStandardConstant( source_info_line_t, "source_info.line", positive_t, "1" ); declareStandardConstant( source_info_src_loc_t, "source_info.source_location", uni_string_t, "1:" ); declareStandardConstant( source_info_enc_ent_t, "source_info.enclosing_entity", uni_string_t, "" ); declareStandardConstant( source_info_script_size_t, "source_info.script_size", natural_t, "0" ); -- will be filled in later when script is loaded declareFunction( source_info_symbol_table_size_t, "source_info.symbol_table_size" ); -- startup built-in packages from other modules StartupTextIO; StartupPen; StartupStats; StartupNumerics; StartupStrings; StartupCommandLine; StartupLockFiles; StartupCGI; StartupCalendar; StartupUnits; StartupFiles; StartupArrays; StartupMySQL; StartupDB; StartupBushOS; StartupCalendar; StartupUnits; StartupDirOps; declareProcedure( sound_play_t, "sound.play" ); declareProcedure( sound_playcd_t, "sound.playcd" ); declareProcedure( sound_stopcd_t, "sound.stopcd" ); declareProcedure( sound_mute_t, "sound.mute" ); declareProcedure( sound_unmute_t, "sound.unmute" ); -- StartupFiles; -- StartupArrays; -- StartupDB; -- StartupBushOS; -- Declare all Environment Variables -- -- If --import_all is not used, still declare PATH, PWD, OLDPWD, HOME, -- TERM, SHELL -- if they exist. importEnvironment; -- Declare any standard shell variables that are not -- yet declared. PATH, PWD, OLDPWD, HOME are necessary for BUSH -- to function: if they weren't imported, declare them locally. findIdent( to_unbounded_string( "PATH" ), temp_id ); -- PATH if temp_id = eof_t then -- still missing? declareIdent( temp_id, "PATH", uni_string_t ); -- declare it end if; if rshOpt then -- restricted sh? identifiers( temp_id).class := constClass; -- PATH is a end if; -- constant findIdent( to_unbounded_string( "PWD" ), temp_id ); -- PWD if temp_id = eof_t then -- missing? declareIdent( temp_id, "PWD", uni_string_t ); -- declare it declare -- lookup current working directory -- perhaps a bush_os.pwd package is in order to share -- this with scanner and builtins? buffer : string( 1..4096 ); begin C_reset_errno; getcwd( buffer, buffer'length ); if C_errno = 0 then identifiers( temp_id ).value := to_unbounded_string( buffer( 1..index( buffer, ASCII.NUL & "" ) - 1 ) ) ; end if; end; end if; findIdent( to_unbounded_string( "OLDPWD" ), temp_id ); -- OLDPWD if temp_id = eof_t then -- missing? declareIdent( temp_id, "OLDPWD", uni_string_t ); -- declare it end if; findIdent( to_unbounded_string( "HOME" ), temp_id ); -- HOME if temp_id = eof_t then -- missing? declareIdent( temp_id, "HOME", uni_string_t ); -- declare it end if; findIdent( to_unbounded_string( "SHELL" ), temp_id ); -- SHELL if temp_id = eof_t then -- missing? declareIdent( temp_id, "SHELL", uni_string_t ); -- declare it identifiers( temp_id ).export := true; -- not exactly right, but works... if C_is_executable_file( "/usr/local/bin/bush" & ASCII.NUL ) then identifiers( temp_id ).value := to_unbounded_string( "/usr/local/bin/bush" ); elsif C_is_executable_file( "/bin/bush" & ASCII.NUL ) then identifiers( temp_id ).value := to_unbounded_string( "/bin/bush" ); end if; end if; findIdent( to_unbounded_string( "TERM" ), temp_id ); -- TERM if temp_id = eof_t then -- missing? declareIdent( temp_id, "TERM", uni_string_t ); -- declare it identifiers( temp_id ).export := true; -- default xterm identifiers( temp_id ).value := to_unbounded_string( "xterm" ); else -- xterm emulation? then change the window name during interactive -- sessions terminalWindowNaming := head( identifiers( temp_id ).value, 5 ) = "xterm" or identifiers( temp_id ).value = "linux"; end if; end resetScanner; ----------------------------------------------------------------------------- -- START SCANNER -- -- Set up symbol table, declaring all keywords, constants, and environment -- variables. This should be executed once when BUSH is started, or to -- restart the scanner after it has been shut down. Run resetScanner. ----------------------------------------------------------------------------- procedure startScanner is procedure saveInitialEnvironment is -- Save a copy of the environment at startup. ev : unbounded_string; -- an env var begin for i in 1..Environment_Count loop ev := to_unbounded_string( Environment_Value( i ) ); environmentList.Queue( initialEnvironment, ev ); end loop; end saveInitialEnvironment; begin maxInteger := long_float( integerOutputType'last-0.9 ); saveInitialEnvironment; -- save a copy of the O/S environment clearHistory; -- The first symbol is the keyword "type", the type of -- all keywords declareKeyword( keyword_t, "_keyword" ); -- Ada 95 keywords declareKeyword( abort_t, "abort" ); declareKeyword( abs_t, "abs" ); declareKeyword( abstract_t, "abstract" ); declareKeyword( accept_t, "accept" ); declareKeyword( access_t, "access" ); declareKeyword( aliased_t, "aliased" ); declareKeyword( all_t, "all" ); declareKeyword( and_t, "and" ); declareKeyword( array_t, "array" ); declareKeyword( at_t, "at" ); declareKeyword( begin_t, "begin" ); declareKeyword( body_t, "body" ); declareKeyword( case_t, "case" ); declareKeyword( constant_t, "constant" ); declareKeyword( declare_t, "declare" ); declareKeyword( delay_t, "delay" ); declareKeyword( delta_t, "delta" ); declareKeyword( digits_t, "digits" ); declareKeyword( do_t, "do" ); declareKeyword( else_t, "else" ); declareKeyword( elsif_t, "elsif" ); declareKeyword( end_t, "end" ); declareKeyword( entry_t, "entry" ); declareKeyword( exception_t, "exception" ); declareKeyword( exit_t, "exit" ); declareKeyword( for_t, "for" ); declareKeyword( function_t, "function" ); declareKeyword( generic_t, "generic" ); declareKeyword( goto_t, "goto" ); declareKeyword( if_t, "if" ); declareKeyword( in_t, "in" ); declareKeyword( interface_t, "interface" ); declareKeyword( is_t, "is" ); declareKeyword( limited_t, "limited" ); declareKeyword( loop_t, "loop" ); declareKeyword( mod_t, "mod" ); declareKeyword( new_t, "new" ); declareKeyword( not_t, "not" ); declareKeyword( null_t, "null" ); declareKeyword( of_t, "of" ); declareKeyword( or_t, "or" ); declareKeyword( others_t, "others" ); declareKeyword( out_t, "out" ); declareKeyword( package_t, "package" ); declareKeyword( pragma_t, "pragma" ); declareKeyword( private_t, "private" ); declareKeyword( procedure_t, "procedure" ); declareKeyword( protected_t, "protected" ); declareKeyword( raise_t, "raise" ); declareKeyword( range_t, "range" ); declareKeyword( record_t, "record" ); declareKeyword( rem_t, "rem" ); declareKeyword( renames_t, "renames" ); declareKeyword( requeue_t, "requeue" ); declareKeyword( return_t, "return" ); declareKeyword( reverse_t, "reverse" ); declareKeyword( select_t, "select" ); declareKeyword( separate_t, "separate" ); declareKeyword( subtype_t, "subtype" ); declareKeyword( tagged_t, "tagged" ); declareKeyword( task_t, "task" ); declareKeyword( terminate_t, "terminate" ); declareKeyword( then_t, "then" ); declareKeyword( type_t, "type" ); declareKeyword( until_t, "until" ); declareKeyword( use_t, "use" ); declareKeyword( when_t, "when" ); declareKeyword( while_t, "while" ); declareKeyword( with_t, "with" ); declareKeyword( xor_t, "xor" ); keywords_top := identifiers_top; -- A punctuation symbol declareKeyword( symbol_t, "Punctuation Symbol" ); -- The end of file token (must be declared early, used in declarations) eof_character := toHighASCII( identifiers_top ); declareKeyword( eof_t, "End of File" ); -- No last output last_output_type := eof_t; -- Built-in Bourne shell-type commands declareProcedure( env_t, "env" ); declareProcedure( typeset_t, "typeset" ); declareProcedure( unset_t, "unset" ); declareProcedure( trace_t, "trace" ); declareProcedure( help_t, "help" ); declareProcedure( clear_t, "clear" ); declareProcedure( jobs_t, "jobs" ); declareProcedure( logout_t, "logout" ); declareProcedure( pwd_t, "pwd" ); declareProcedure( cd_t, "cd" ); declareProcedure( history_t, "history" ); declareProcedure( wait_t, "wait" ); declareProcedure( step_t, "step" ); -- declareKeyword( template_t, "template" ); -- SQL declareKeyword( alter_t, "alter" ); declareKeyword( insert_t, "insert" ); --declareKeywrod( select_t, "select" ); declareKeyword( update_t, "update" ); declareKeyword( delete_t, "delete" ); -- remember stack top for last keyword reserved_top := identifiers_top; if reserved_top > 255 then put_line( standard_error, "Too many reserved words (limit 128)" ); raise PROGRAM_ERROR; end if; -- Initialize all scanner variables and declare all other -- standard identifiers. resetScanner; end startScanner; -- Types function inEnvironment( id : identifier ) return boolean is -- Return true if variable is in O/S environment key : unbounded_string; -- "var=" ev : unbounded_string; -- an env var exists : boolean := false; begin key := identifiers( id ).name & "="; -- look for this for i in 1..Environment_Count loop -- all env vars ev := to_unbounded_string( Environment_Value( i ) ); -- get next one if Head( ev, length( key ) ) = key then -- match? exists := true; -- found exit; -- we're done end if; end loop; return exists; end inEnvironment; procedure refreshVolatile( id : identifier ) is -- Look up a volatile variable and refresh it -- (Volatile variables are only environment variables.) key : unbounded_string; -- "var=" ev : unbounded_string; -- an env var refreshed : boolean := false; begin key := identifiers( id ).name & "="; -- look for this for i in 1..Environment_Count loop -- all env vars ev := to_unbounded_string( Environment_Value( i ) ); -- get next one if Head( ev, length( key ) ) = key then -- match? identifiers( id ).value := -- get value Tail( ev, length( ev ) - length( key ) ); -- and assign refreshed := true; exit; -- we're done end if; end loop; -- fall back to initial environment if not refreshed then for i in 1..environmentList.Length( initialEnvironment ) loop environmentList.Find( initialEnvironment, i, ev ); if Head( ev, length( key ) ) = key then -- match? identifiers( id ).value := -- get value Tail( ev, length( ev ) - length( key ) ); -- and assign refreshed := true; exit; -- we're done end if; end loop; end if; if not refreshed then err( "unable to find variable " & to_string( identifiers( id ).name ) & " in O/S enviroment" ); end if; end refreshVolatile; function getUniType( original : identifier ) return identifier is -- Dereference identifier until we find the universal type that this type -- is based on. quit if a circular relationship is suspected. An -- error will be reported to the user and universal_typeless is -- returned. temp_id : identifier; count : natural := 0; begin -- safety check: eof_t is often returned on an error in the parser if identifiers( original ).kind = eof_t then err( "type expected" ); return universal_t; -- new identifiers declared by the scanner have no type yet elsif identifiers( original ).kind = new_t then return new_t; -- safety check: keywords have no type elsif identifiers( original ).kind = keyword_t then err( "type expected, not a keyword" ); return universal_t; end if; -- "Dereference" types/subtypes, moving up type hierarchy, -- until a type derived from variable_t is found. This -- will be the universal type the type is derived from. -- If there are more than 100 dereferences, assume this -- is a circular relationship (this should only occur in -- an internal error in BUSH). temp_id := original; while identifiers( temp_id ).kind /= variable_t loop temp_id := identifiers( temp_id ).kind; count := count + 1; if count >= 100 then err( "circular type relationship" ); exit; end if; end loop; return temp_id; end getUniType; function getBaseType( original : identifier ) return identifier is -- Dereference original type until we find the original, parent root type -- (i.e. for types declared with "subtype", move up the type -- hierarchy to the first parent that is not a subtype). Quit -- if a circular relationship is suspected. On an error, an error -- message is displayed and universal_typeless is returned. temp_id : identifier; count : natural := 0; begin -- safety check: eof_t is often returned on an error in the parser if identifiers( original ).kind = eof_t then err( "type expected" ); return universal_t; -- new identifiers declared by the scanner have no type yet elsif identifiers( original ).kind = new_t then return new_t; -- safety check: keywords have no type elsif identifiers( original ).kind = keyword_t then err( "type expected, not a keyword" ); return universal_t; end if; -- "Dereference" subtypes, moving up type hierarchy, -- until a non-subtype (that is, the base type) is found. -- If there are more than 100 dereferences, assume this -- is a circular relationship (this should only occur in -- an internal error in BUSH). temp_id := original; while identifiers( temp_id ).class = subClass loop temp_id := identifiers( temp_id ).kind; count := count + 1; if count >= 100 then err( "circular type relationship" ); exit; end if; end loop; return temp_id; end getBaseType; function getClassName( class : anIdentifierClass ) return string is -- Look up an identifier class and return a string describing the -- class (that is, "constant", "type", "variable", etc.) Used -- by class_ok when reporting errors. begin if class = constClass then -- constClass? return "constant"; -- "constant" elsif class = typeClass then -- typeClass? return "type"; -- "type" elsif class = subClass then -- subClass? return "subtype"; -- "subtype" elsif class = funcClass then -- funcClass? return "function"; -- "function" elsif class = userFuncClass then -- funcClass? return "function"; -- "function" elsif class = procClass then -- procClass? return "procedure"; -- "procedure" elsif class = userProcClass then -- userProcClass? return "procedure"; -- "procedure" elsif class = taskClass then -- subClass? return "task"; -- "task" else -- otherwise return "variable"; -- "variable" end if; end getClassName; function class_ok( id : identifier; class : anIdentifierClass ) return boolean is -- Check if identifier matches a certain class. If the identifier is -- of another class, display an error message and return false. begin if identifiers( id ).class /= class then if id = eof_t then err( "internal error: eof given to class_ok(1)" ); elsif id < reserved_top then err_previous( "a " & bold( "keyword" ) & " is not a " & getClassName( class ) ); else err_previous( bold( to_string( identifiers( id ).name ) ) & " is a " & getClassName( identifiers( id ).class ) & ", not a " & getClassName( class ) ); end if; return false; end if; return true; end class_ok; function class_ok( id : identifier; c1,c2 : anIdentifierClass ) return boolean is -- Check if identifier matches one of two classes. If the identifier is of -- another class, display an error message and return false. begin if identifiers( id ).class /= c1 and identifiers( id ).class /= c2 then if id = eof_t then err( "internal error: eof given to class_ok(2)" ); elsif id < reserved_top then err_previous( "a " & bold( "keyword" ) & " is not a " & getClassName( c1 ) & " or a " & getClassName( c2 ) ); else err_previous( bold( to_string( identifiers( id ).name ) ) & " is a " & getClassName( identifiers( id ).class ) & ", not a " & getClassName( c1 ) & " or a " & getClassName( c2 ) ); end if; return false; end if; return true; end class_ok; function class_ok( id : identifier; c1,c2,c3 : anIdentifierClass ) return boolean is -- Check if identifier matches one of two classes. If the identifier is of -- another class, display an error message and return false. begin if identifiers( id ).class /= c1 and identifiers( id ).class /= c2 and identifiers( id ).class /= c3 then if id = eof_t then err( "internal error: eof given to class_ok(2)" ); elsif id < reserved_top then err_previous( "a " & bold( "keyword" ) & " is not a " & getClassName( c1 ) & ", " & getClassName( c2 ) & " or a " & getClassName( c3 ) ); else err_previous( bold( to_string( identifiers( id ).name ) ) & " is a " & getClassName( identifiers( id ).class ) & ", not a " & getClassName( c1 ) & ", " & getClassName( c2 ) & " or a " & getClassName( c3 ) ); end if; return false; end if; return true; end class_ok; function uniTypesOk( leftType, rightType : identifier ) return boolean is -- Check that the two types are extended from a common universal type. -- If the types differ, report an error message and return false. effectiveLeftType : identifier; effectiveRightType : identifier; msg : unbounded_string; begin -- Basic checks: we're expecting a type, subtype or array type. Unversal -- typeless is always a match. if not class_ok( leftType, typeClass, subClass ) then return false; elsif not class_ok( rightType, typeClass, subClass ) then return false; elsif leftType = variable_t or rightType = variable_t then return true; end if; -- Determine the root type for the variables. If either is -- extended from universal typeless, it's a match. effectiveLeftType := getUniType( leftType ); effectiveRightType := getUniType( rightType ); if effectiveLeftType = universal_t or effectiveRightType = universal_t then return true; end if; -- If the types don't match, display an error message showing -- the type as well as the root type. if identifiers( leftType ).list and not identifiers( rightType ).list then msg := "type " & bold( to_string( identifiers( leftType ).name ) ) & "is an array but type " & to_unbounded_string( bold( to_string( identifiers( rightType ).name ) ) & " is not an array" ); elsif not identifiers( leftType ).list and identifiers( rightType ).list then msg := "type " & bold( to_string( identifiers( leftType ).name ) ) & "is not an array but type " & to_unbounded_string( bold( to_string( identifiers( rightType ).name ) ) & " is an array" ); elsif effectiveLeftType /= effectiveRightType then msg := to_unbounded_string( "type " & bold( to_string( identifiers( leftType ).name ) ) ); if effectiveLeftType = root_enumerated_t then msg := msg & " (an enumerated type)"; elsif identifiers( leftType ).kind /= variable_t then msg := msg & " (" & AorAN( identifiers( effectiveLeftType ).name ) & ")"; end if; msg := msg & " is inherently different from " & bold( to_string( AorAN( identifiers( rightType ).name ) ) ); if effectiveRightType = root_enumerated_t then msg := msg & " (an enumerated type)"; elsif identifiers( rightType ).kind /= variable_t then msg := msg & " (" & AorAN( identifiers( effectiveRightType ).name ) & ")"; end if; err_previous( to_string( msg ) ); return false; end if; return true; end uniTypesOk; function baseTypesOk( leftType, rightType : identifier ) return boolean is -- Check that the two types are extended from a common base type. -- If the types differ, report an error message and return false. effectiveLeftType : identifier; effectiveRightType : identifier; begin -- Basic checks: if the root types don't match, then the base types -- won't. If either type is universal typeless, they automatically -- match. if not uniTypesOk( leftType, rightType ) then return false; end if; if leftType = universal_t or rightType = universal_t then return true; end if; effectiveLeftType := getBaseType( leftType ); effectiveRightType := getBaseType( rightType ); -- Universal type cases: Universal numeric or universal string will -- match depending on the root type of the second type. if effectiveLeftType = uni_numeric_t and then getUniType( rightType ) = uni_numeric_t then return true; end if; if effectiveLeftType = uni_string_t and then getUniType( rightType ) = uni_string_t then return true; end if; if effectiveRightType = uni_numeric_t and then getUniType( leftType ) = uni_numeric_t then return true; end if; if effectiveRightType = uni_string_t and then getUniType( leftType ) = uni_string_t then return true; end if; -- Otherwise, the types must be identical. if effectiveLeftType /= effectiveRightType then err_previous( "type " & bold( to_string( identifiers( leftType ).name) ) & " is not compatible with type " & bold( to_string( identifiers( rightType ).name ) ) ); return false; end if; return true; end baseTypesOk; function intTypesOk( givenType, desiredType : identifier ) return boolean is -- Like baseTypesOk, check that the two integer subtypes are -- compatible. This is placed in a separate function for performance -- and because, unlike baseTypesOK, the order of the parameters is -- important. If the types are not compatible, reports an error -- message and return false. effectiveGivenType : identifier; effectiveDesiredType : identifier; begin effectiveGivenType := getBaseType( givenType ); -- dereference effectiveDesiredType := getBaseType( desiredType ); -- subtypes if effectiveDesiredType = integer_t then -- looking for if effectiveGivenType = integer_t then -- an integer? return true; -- integer, elsif effectiveGivenType = natural_t then -- natural, return true; -- positive elsif effectiveGivenType = positive_t then -- are good return true; end if; elsif effectiveDesiredType = natural_t then -- natural? if effectiveGivenType = natural_t then -- natural, return true; -- positive elsif effectiveGivenType = positive_t then -- are good return true; end if; end if; -- positive must be an exact match so it can fall through to -- baseTypesOK return baseTypesOK( givenType, desiredType ); -- fall back end intTypesOk; ----------------------------------------------------------------------------- -- Scanning ----------------------------------------------------------------------------- ----------------------------------------------------------------------------- -- getSourceFileName -- -- Determine the current source file as stored against the byte code line. ----------------------------------------------------------------------------- function getSourceFileName return unbounded_string is line_firstpos : natural; -- start of compiled line sourceFile : aSourceFile; sourceNumber : sourceFilesList.aListIndex; begin line_firstpos := cmdpos; -- start at current pos if line_firstpos > 1 then -- sane value? line_firstpos := line_firstpos - 1; -- search for previous while script( line_firstpos ) /= ASCII.NUL loop -- ASCII.NUL line_firstpos := line_firstpos - 1; end loop; end if; line_firstpos := line_firstpos + 1; -- skip NUL sourceNumber := sourceFilesList.aListIndex( character'pos( script( line_firstpos ) ) ); sourceFilesList.Find( sourceFiles, sourceNumber, sourceFile ); return sourceFile.Name; end getSourceFileName; ----------------------------------------------------------------------------- -- getLineNo -- -- Determine the current line number as stored against the byte code line. ----------------------------------------------------------------------------- function getLineNo return natural is line_firstpos : natural; -- start of compiled line line_number : natural; begin line_firstpos := cmdpos; -- start at current pos if line_firstpos > 1 then -- sane value? line_firstpos := line_firstpos - 1; -- search for previous while script( line_firstpos ) /= ASCII.NUL loop -- ASCII.NUL line_firstpos := line_firstpos - 1; end loop; end if; line_firstpos := line_firstpos + 1; -- skip NUL line_number := ( character'pos( script( line_firstpos + 1 ) ) -1 ) + ( character'pos( script( line_firstpos + 2 ) ) -1 ) * 256; return line_number; end getLineNo; ----------------------------------------------------------------------------- -- GET COMMAND LINE -- -- Return current command line, decoded into normal text, but not including -- the LF separating lines. token_firstpos and token_lastpos is the location -- of the current token on the expanded line. ----------------------------------------------------------------------------- procedure getCommandLine ( cmdline : out unbounded_string; token_firstpos, token_lastpos, line_number : out natural ) is line_firstpos : natural; -- start of compiled line line_lastpos : natural; -- end of compiled line indent : natural; len : natural; is_escaping : boolean; begin -- Script unexpectedly null? Print a message an let an exception be raised -- later. if script = null then put_line( standard_error, "internal_error: getCommandLine: script is null" ); return; end if; -- cmdpos has an insane value? Print a message and let an exception be -- raised later. if cmdpos > script'length then put_line( standard_error, "internal_error: getCommandLine: cmdpos " & cmdpos'img & " is greater than length of script " & script'length'img ); return; end if; -- Prepare to find the start and end of the command line line_firstpos := cmdpos; -- start at current pos line_lastpos := cmdpos; -- start at current pos is_escaping := false; -- not escaping -- find beginning and end of command line -- (as it appears in the byte code) if line_firstpos > 1 then -- sane value? line_firstpos := line_firstpos - 1; -- search for previous while script( line_firstpos ) /= ASCII.NUL loop -- ASCII.NUL line_firstpos := line_firstpos - 1; end loop; end if; if line_lastpos <= script'length then -- sane value? while script( line_lastpos ) /= ASCII.NUL loop -- look for next line_lastpos := line_lastpos + 1; -- ASCII.NUL end loop; -- or this one if end if; -- on one if line_lastpos - line_firstpos <= 2 then -- a blank line? cmdLine := null_unbounded_string; -- return null string token_firstpos := 1; token_lastpos := 1; return; end if; -- skip ASCII.NUL at end of last line and the information at the start -- of the current line. Extract the line number and indent. line_firstpos := line_firstpos + 1; -- skip NUL line_number := ( character'pos( script( line_firstpos + 1 ) ) -1 ) + ( character'pos( script( line_firstpos + 2 ) ) - 1 ) * 256; line_firstpos := line_firstpos + 3; -- skip line number info line_lastpos := line_lastpos - 1; indent := natural( integer( character'pos( script( line_firstpos ) ) - 1 ) ); line_firstpos := line_firstpos+1; -- find token in command line if firstpos > line_firstpos then -- token on line? token_firstpos := firstpos-line_firstpos+1; -- position in token_lastpos := lastpos-line_firstpos+1; -- returned string cmdline := null_unbounded_string; -- begin decompression for i in line_firstpos..line_lastpos loop -- for bytes in script if script( i ) > ASCII.DEL then -- a byte code? expand if script( i ) = high_ascii_escape then -- escaping if not is_escaping then is_escaping := true; else -- escaping itself? cmdline := cmdline & script( i ); -- add it is_escaping := false; -- and no longer escape end if; else if not is_escaping then -- not escaping? cmdline := cmdline & identifiers( character'pos( script(i) )-128 ).name; len := length( identifiers( character'pos( script(i) ) - 128 ).name ); if firstpos = lastpos and firstpos = i then -- tokenized keyword? token_lastpos := token_lastpos + len-1; -- adjust end position elsif lastpos > i then -- token shifted? token_lastpos := token_lastpos + len-1; -- adjust if firstpos > i then token_firstpos := token_firstpos + len-1; end if; end if; else cmdline := cmdline & script( i ); is_escaping := false; end if; end if; else -- not a code? cmdline := cmdline & script( i ); -- just add end if; end loop; -- for all codes token_firstpos := token_firstpos + indent; -- adj token pos token_lastpos := token_lastpos + indent; -- for ident size else -- not processed yet? token_firstpos := 1; -- position at token_lastpos := 1; -- first character cmdline := null_unbounded_string; -- same, without for i in line_firstpos..line_lastpos loop -- token stuff... if script( i ) = ASCII.HT then -- embedded tab? while (length( cmdline )) mod 8 /= 0 loop -- move to a column cmdline := cmdline & " "; -- of 8 end loop; elsif script( i ) > ASCII.DEL then -- keyword token? if script( i ) = high_ascii_escape then -- escaping if not is_escaping then is_escaping := true; else -- escaping itself? cmdline := cmdline & script( i ); -- add it is_escaping := false; -- and not escape end if; else if not is_escaping then -- not escaping? cmdline := cmdline & identifiers( character'pos( script(i) )-128 ).name; len := length( identifiers( character'pos( script(i) ) - 128 ).name ); if firstpos = lastpos and firstpos = i then -- token keyword? token_lastpos := token_lastpos + len-1; -- adj end posn elsif lastpos > i then -- token shifted? token_lastpos := token_lastpos + len-1; -- adjust if firstpos > i then token_firstpos := token_firstpos + len-1; end if; end if; else cmdline := cmdline & script( i ); is_escaping := false; end if; end if; else -- other character? cmdline := cmdline & script( i ); end if; end loop; end if; insert( cmdline, 1, to_string( indent * " " ) ); -- expand indentation if token_firstpos > length( cmdline ) then -- past end of cmd? token_firstpos := line_lastpos+1-line_firstpos; -- treat token as token_lastpos := token_firstpos; -- one char past end end if; end getCommandLine; function getCommandLine return unbounded_string is -- Return current command line, fully indented, but not including -- the LF separating lines. This function version doesn't compute -- the token position on the expanded line. firstpos, lastpos : natural; cmdline : unbounded_string; line_number : natural; begin getCommandLine( cmdline, firstpos, lastpos, line_number ); return cmdline; end getCommandLine; ----------------------------------------------------------------------------- -- GET NEXT TOKEN -- -- The main scanner procedure. Interpret the input text and return a token -- representing the next item on the line. Declare new, unknown identifiers -- in the identifier symbol table automatically (it's up to the caller to -- remove them). If the last token was an end-of-file token, continue -- to return end-of-file tokens forever. ----------------------------------------------------------------------------- gnt_commandLine : unbounded_string; procedure getNextToken is id : identifier; word : unbounded_string; ch : character; -- ch is a character buffer to reduce array accesses. Really, -- these should be optimized away by the compiler, but you'd -- be surprised by what a compiler won't optimize away... is_based_number : boolean; -- true if numeric literal has a base token_firstpos, token_lastpos, lineno : natural; begin -- Out of data? Never any more data. if token = eof_t then return; end if; -- End of line? Read next line. Display it if tracing is on. <> ch := script( cmdpos ); while ch = ASCII.NUL loop if trace then if syntax_check or (not exit_block and not error_found) then cmdpos := cmdpos + 2; -- first character of next command put( standard_error, "=> " & '"' ); getCommandLine( gnt_commandLine, token_firstpos, token_lastpos, lineno ); put( standard_error, toEscaped( gnt_commandLine ) ); put( standard_error, """ [" ); put( standard_error, lineno'img ); put_line( standard_error, "]" ); cmdpos := cmdpos - 2; end if; end if; cmdpos := cmdpos+nextScriptCommandOffset; -- line header and indent marker ch := script( cmdpos ); end loop; -- skip any white space before token if ch = ' ' or ch = ASCII.HT then cmdpos := cmdpos + 1; while script( cmdpos ) = ' ' or script( cmdpos ) = ASCII.HT loop cmdpos := cmdpos + 1; end loop; ch := script( cmdpos ); end if; -- token begins here firstpos := cmdpos; -- first char lastpos := cmdpos; -- last default -- Immediate Words if ch = eof_character then token := eof_t; return; elsif ch = immediate_word_delimiter then -- Immediate word string literals (word_t) -- -- The preprocessor will have placed delimiters around the word so we -- shouldn't have to check for a missing one. cmdpos := cmdpos+1; -- continue lastpos := cmdpos; -- reading while script( lastpos ) /= immediate_word_delimiter loop -- until last lastpos := lastpos+1; end loop; lastpos := lastpos-1; identifiers( word_t ).value := To_Unbounded_String( -- extract string script( cmdpos..lastpos ) ); cmdpos := lastpos+2; -- skip last " token := word_t; -- word literal return; elsif ch = immediate_sql_word_delimiter then -- Immediate SQL word string literals (sql_word_t) -- -- This is the same as a word_t but a different token value is used so -- BUSH will not try to expand the pattern (we don't want "select *" -- to be replaced with select and a list of file names). -- -- The preprocessor will have placed delimiters around the word so we -- shouldn't have to check for a missing one. cmdpos := cmdpos+1; -- continue lastpos := cmdpos; -- reading while script( lastpos ) /= immediate_sql_word_delimiter loop -- until last lastpos := lastpos+1; end loop; lastpos := lastpos-1; identifiers( sql_word_t ).value := To_Unbounded_String( -- extract string script( cmdpos..lastpos ) ); cmdpos := lastpos+2; -- skip last " token := sql_word_t; -- word literal return; -- elsif is_letter( ch ) or ch = high_ascii_escape then -- identifier? elsif (ch >= 'a' and ch <='z') or (ch >= 'A' and ch <='Z') or ch = high_ascii_escape then -- identifier? -- Identifiers or uncompressed keywords -- -- (Note: Control characters and leading underscores filtered out -- previously in tokenize stage). Previously unseen identifiers -- will be declared as type "new". if ch = high_ascii_escape then -- high ascii? word := null_unbounded_string; -- skip it else -- otherwise word := null_unbounded_string & ch; -- add to buffer end if; ch := script( lastpos ); -- continue while ch /= immediate_word_delimiter loop lastpos := lastpos+1; -- keep looking ch := script( lastpos ); -- next char exit when ch = immediate_word_delimiter; -- end found? bail if ch /= high_ascii_escape then -- skip high flag word := word & ch; -- otherwise end if; -- add to buffer end loop; id := eof_t; -- assume not lastpos := lastpos - 1; -- before delim for i in reverse 1..identifiers_top-1 loop -- search symbol if identifiers( i ).name = word then -- table id := i; exit; end if; end loop; if id = eof_t then -- not found? declareIdent( token, word, new_t ); -- declare it else -- otherwise if identifiers( id ).deleted then -- was deleted? identifiers( id ).deleted := false; -- redeclare identifiers( id ).kind := new_t; -- with type new end if; -- either way token := id; -- return id end if; -- skip delim cmdpos := lastpos + 2; -- next from here return; -- Check for a compressed keyword (represented with high bit set) elsif ch > ASCII.DEL then -- > ASCII 127 token := identifier( character'pos( script( cmdpos ) ) - 128 ); cmdpos := cmdpos + 1; -- special VM tokens here -- -- load_nr: load value from numeric register. Treat it like a number -- literal because that's all that's currently loaded into VMNR. -- load_sr: load value from string register. Treat it like a string -- literal because that's all that's currently loaded into VMSR. --if token = load_nr_t then -- token := number_t; -- identifiers( token ).value := VMNR(aVMNRNumber(character'pos(script(cmdpos))-1)); -- cmdpos := cmdpos + 1; --elsif token = load_sr_t then -- token := strlit_t; -- identifiers( token ).value := VMSR(aVMSRNumber(character'pos(script(cmdpos))-1)); -- cmdpos := cmdpos + 1; --end if; return; elsif is_digit( ch ) then -- a digit? -- Numeric literals (number_t) is_based_number := false; lastpos := lastpos+1; -- continue ch := script( lastpos ); -- reading while is_digit( ch ) or ch = '_' or ch = '.' or ch = '#' or (ch >= 'a' and ch <= 'f') or (ch >= 'A' and ch <= 'F') loop -- until end if script( lastpos..lastpos+1 ) = ".." then -- ".." -- of number exit; end if; if ch = '#' then is_based_number := true; end if; lastpos := lastpos+1; ch := script( lastpos ); exit when ch = ASCII.NUL; end loop; lastpos := lastpos - 1; if script( lastpos ) = '.' then err( "no digits after decimal" ); return; end if; if is_based_number then begin identifiers( number_t ).value := to_unbounded_string( natural'image( natural'value( ' ' & script( cmdpos..lastpos ) ) ) ); exception when others => err( "invalid based numeric literal" ); end; else identifiers( number_t ).value := To_Unbounded_String( -- extract number ' ' & script( cmdpos..lastpos ) ); end if; cmdpos := lastpos+1; -- advance posn token := number_t; -- numeric lit. return; elsif ch = ''' then -- single quote? -- Character literal / single quote string literals (charlit_t) -- -- (Note: Missing trailing quote handled in tokenizing stage.) cmdpos := cmdpos+1; -- continue lastpos := cmdpos; -- reading -- SPECIAL CASE: ''' (single quoted single quote) if script( lastpos ) = ''' then -- ''? if script( lastpos+1 ) = ''' then -- '''? lastpos := lastpos+2; -- skip literal cmdpos := lastpos; -- start here next identifiers( charlit_t ).value := To_Unbounded_String( "'" ); -- extract string token := charlit_t; -- char literal return; -- that's it end if; -- fall through end if; -- NORMAL CASE if script( lastpos ) = high_ascii_escape then lastpos := lastpos + 1; end if; identifiers( charlit_t ).value := To_Unbounded_String( -- extract string "" & script( lastpos ) ); lastpos := lastpos+1; cmdpos := lastpos+1; -- skip last ' token := charlit_t; -- char literal return; elsif ch = '"' then -- double quote? -- Double quote string literals (strlit_t) -- -- (Note: Missing trailing quote handled in tokenizing stage.) cmdpos := cmdpos+1; -- continue lastpos := cmdpos; -- reading -- SPECIAL CASE: """" (double quoted double quote) if script( lastpos ) = '"' then -- ""? if script( lastpos+1 ) = '"' then -- """? if script( lastpos+2 ) = '"' then -- """"? lastpos := lastpos+3; -- skip literal cmdpos := lastpos; -- start here next identifiers( strlit_t ).value := To_Unbounded_String( """" ); -- extract string token := strlit_t; -- string literal return; -- that's it end if; end if; -- fall through end if; -- NORMAL CASE identifiers( strlit_t ).value := Null_Unbounded_String; while script( lastpos ) /= '"' loop -- until last " if script( lastpos ) = high_ascii_escape then lastpos := lastpos + 1; end if; identifiers( strlit_t ).value := identifiers( strlit_t ).value & script( lastpos ); lastpos := lastpos + 1; end loop; -- lastpos := lastpos+1; cmdpos := lastpos+1; -- skip last " token := strlit_t; -- string literal return; elsif ch = '`' then -- back quote? -- Backquoted strings (backlit_t) -- -- (Note: Missing trailing quote handled in tokenizing stage.) cmdpos := cmdpos+1; -- continue lastpos := cmdpos; -- reading identifiers( backlit_t ).value := Null_Unbounded_String; while script( lastpos ) /= '`' loop -- until last ` if script( lastpos ) = high_ascii_escape then lastpos := lastpos + 1; end if; identifiers( backlit_t ).value := identifiers( backlit_t ).value & script( lastpos ); lastpos := lastpos + 1; end loop; -- lastpos := lastpos+1; cmdpos := lastpos+1; -- skip last ` token := backlit_t; -- string literal return; else -- Anything else is a symbol token (symbol_t) case ch is when '$' => -- $$ $# $? cmdpos := cmdpos + 1; -- $0..$9 if script( cmdpos ) = '$' then cmdpos := cmdpos + 1; elsif script( cmdpos ) = '#' then cmdpos := cmdpos + 1; elsif script( cmdpos ) = '?' then cmdpos := cmdpos + 1; elsif script( cmdpos ) >= '0' and script( cmdpos ) <= '9' then cmdpos := cmdpos + 1; end if; when '#' => -- # comments cmdpos := cmdpos + 1; while script( cmdpos ) /= ASCII.NUL loop cmdpos := cmdpos + 1; end loop; goto redo; when '-' => -- - / -- comment cmdpos := cmdpos + 1; if script( cmdpos ) = '-' then while script( cmdpos ) /= ASCII.NUL loop cmdpos := cmdpos + 1; end loop; goto redo; end if; when '=' => -- = / => cmdpos := cmdpos + 1; if script( cmdpos ) = '>' then cmdpos := cmdpos + 1; end if; when ':' => -- : / := cmdpos := cmdpos + 1; if script( cmdpos ) = '=' then cmdpos := cmdpos + 1; end if; when '*' => -- * / ** cmdpos := cmdpos + 1; if script( cmdpos ) = '*' then cmdpos := cmdpos + 1; end if; when '>' => -- > / >= cmdpos := cmdpos + 1; if script( cmdpos ) = '=' then cmdpos := cmdpos + 1; end if; when '<' => -- < / <= cmdpos := cmdpos + 1; if script( cmdpos ) = '=' then cmdpos := cmdpos + 1; end if; when '/' => -- '/' / /= cmdpos := cmdpos + 1; if script( cmdpos ) = '=' then cmdpos := cmdpos + 1; end if; when '.' => -- . / .. cmdpos := cmdpos + 1; if script( cmdpos ) = '.' then cmdpos := cmdpos + 1; end if; when '_' => -- _ test err( "Leading underscores not allowed in identifiers" ); return; when '!' => -- ! / != test if script( cmdpos+1 ) = '=' then err( "/= expected" ); end if; return; when others => -- return other cmdpos := cmdpos + 1; -- as a symbol end case; lastpos := cmdpos-1; -- end of token identifiers( symbol_t ).value := To_Unbounded_String( script( firstpos..lastpos ) ); token := symbol_t; -- a symbol return; end if; end getNextToken; procedure expect( expected_token : identifier ) is -- Check for the specified identifier. If the current token matches, -- get the next token, otherwise show an error. begin if token /= expected_token then if expected_token = keyword_t then err( "keyword expected" ); elsif expected_token = number_t then err( "number expected" ); elsif expected_token = strlit_t then err( "string literal expected" ); elsif expected_token = symbol_t then err( "symbol expected" ); else err( to_string( identifiers( expected_token ).name ) & " expected" ); end if; end if; getNextToken; end expect; procedure expect( expected_token : identifier; value : string ) is -- Check for the specified identifier and value. If the current token -- and its value matches, get the next token, otherwise show an error. begin if value /= to_string( identifiers( token ).value ) then err( "'" & value & "' expected" ); getNextToken; else getNextToken; end if; end expect; procedure expectSemicolon is begin if token = symbol_t and identifiers( token ).value = ":" then err( "':' should be ';'" ); else expect( symbol_t, ";" ); end if; end expectSemicolon; procedure skipWhiteSpace is -- Move scanner position to the first non-white space character -- (that is, spaces or tabs in the tokenized script). begin firstPos := cmdpos; while script( firstPos ) /= ASCII.NUL loop exit when script( firstPos ) /= ' ' or script( firstpos ) = ASCII.HT; firstPos := firstPos + 1; end loop; cmdPos := firstPos; end skipWhiteSpace; ------------------------------------------------------ -- Saving/Restoring Position -- ------------------------------------------------------ procedure markScanner( scannerState : out aScannerState ) is -- Record the current state of the scanner, including the token -- and the position in the current line. begin scannerState.token := token; scannerState.first := firstpos; scannerState.cmdpos := cmdpos; scannerState.last := lastpos; if token = symbol_t or token = strlit_t or token = charlit_t or token = number_t or token = word_t then scannerState.value := identifiers( token ).value; end if; end markScanner; procedure resumeScanning( scannerState : aScannerState ) is -- Restore the scanner to a previously recorded position, to continue -- execution at that place. begin token := scannerState.token; firstpos := scannerState.first; cmdpos := scannerState.cmdpos; lastpos := scannerState.last; if token = symbol_t or token = strlit_t or token = charlit_t or token = number_t or token = word_t then identifiers( token ).value := scannerState.value; end if; end resumeScanning; procedure saveScript( scriptState : out aScriptState ) is -- Save scanner state plus the current script so that a new -- script can be executed. The error flag, syntax check flag, -- etc. are not saved. begin if script = null then err( "Internal error: saveScript has no script to save" ); end if; markScanner( scriptState.scannerState ); scriptState.script := script; scriptState.size := identifiers( source_info_script_size_t ).value; scriptState.inputMode := inputMode; script := null; end saveScript; procedure restoreScript( scriptState : in out aScriptState ) is -- Restore a previously saved script, destroying the current one -- (if any). Execution will continue where it previously left -- off. begin if scriptState.script = null then err( "Internal error: restoreScript has no script to restore" ); end if; if script /= null then free( script ); end if; script := scriptState.script; scriptState.script := null; inputMode := scriptState.inputMode; identifiers( source_info_script_size_t ).value := scriptState.size; resumeScanning( scriptState.scannerState ); end restoreScript; ------------------------------------------------------ -- "BYTE CODE" GENERATION -- -- BUSH only runs compressed scripts. The compression -- process checks for certain syntax errors and makes -- the following changes: -- -- * EOL characters are replaced by ASCII nul's. -- * leading indentation is a single byte at the start -- of a line, allowing BUSH to ignore indentation -- unless the line is being printed to the screen. -- The actual value is +1 (so that 1 is no -- indentation, 2 is one space, ...) so that the -- only zero bytes are the EOL characters. -- * keywords are tokenized as a single byte with the -- position in the symbol table with the high bit -- set, avoiding a slow symbol search on keywords -- * EOF tokens are added as the beginning and ending -- "lines" of the script to act as sentinels. -- -- There could be other features in the future. -- -- For example: -- if x > y then -- becomes -- [ASCII 3][if code] x > y [then code][ASCII 0] -- -- reducing 17 bytes to 11 bytes, about 2/3rds the -- number of characters to read through when running -- a script. ------------------------------------------------------ -- Assuming everything is AdaScript works in most -- cases except for commands like "cd bush-0.9.1" -- which report an error since the byte code compiler -- assumes 0.9.1 is a malformed numeric literal. In -- order to deal with this kind of error, the compiler -- needs to have some minimal context info to determine -- if it's looking Bourne shell parameters or not. To -- avoid writing a recursive version of line2ByteCode, -- we'll use an enumerated variable to represent the -- the parsing history that we need. The history must -- be carried between lines. -- -- startOfStatement - at start of script or after last command -- startOfParameters - a command and we're looking for parameters -- shellParameters - no '(' so it's Bourne shell parameters -- normalParameters - an AdaScript statement or parameters -- -- Rules: -- if 'is', 'then', 'loop', ';' -> startOfStatement -- if startOfStatement and keyword -> normal -- if startOfStatement and identifier -> startOfParameters -- if startOfParameters and '(' -> normal else shellParameters -- -- Note: adding user-defined procedure and functions -- will break this logic since ';' now have a second context -- beyond a statement terminator. ------------------------------------------------------ type compressionContext is ( startOfStatement, startOfParameters, startOfDeleteParameters, isPart, adaScriptStatement, shellStatement, SQLStatement ); ------------------------------------------------------ -- General Purpose Register Assignment -- type aVMNRMapping is array( aVMNRNumber ) of unbounded_string; type aVMSRMapping is array( aVMSRNumber ) of unbounded_string; type aVMIRMapping is array( aVMIRNumber ) of identifier; -- the association of which variable with a register type compressionInfo is record compressedScript : unbounded_string; VMNRmap : aVMNRMapping; nextVMNR : aVMNRNumber := 0; VMSRmap : aVMSRMapping; nextVMSR : aVMSRNumber := 0; VMIRmap : aVMIRMapping; nextVMIR : aVMIRNumber := 0; context : compressionContext := startOfStatement; end record; ----------------------------------------------------------------------------- -- SET BYTE CODE LINE -- -- Set the current byte code line number. ----------------------------------------------------------------------------- procedure setByteCodeLine( lineno : natural ) is -- pragma suppress( RANGE_CHECK ); -- GCC 3.3.3 might falsely say overflow begin SourceLineNoLo := lineno mod 255; SourceLineNoHi := lineno / 255; end setByteCodeLine; ----------------------------------------------------------------------------- -- NEXT BYTE CODE LINE -- -- Advance the byte code line counter. -- Remember: zero is reserved for end of line so count ends at 254 + 1 ----------------------------------------------------------------------------- procedure nextByteCodeLine is pragma suppress( RANGE_CHECK ); -- GCC 3.3.3 falsely says overflow begin SourceLineNoLo := SourceLineNoLo + 1; if SourceLineNoLo > 254 then SourceLineNoLo := 0; SourceLineNoHi := SourceLineNoHi + 1; end if; end nextByteCodeLine; ----------------------------------------------------------------------------- -- GET BYTE CODE LINE NO -- -- Return the current value of the byte code line counter ----------------------------------------------------------------------------- function getByteCodeLineNo return natural is begin return (SourceLineNoLo) + (SourceLineNoHi) * 255; end getByteCodeLineNo; ----------------------------------------------------------------------------- -- ERR TOKENIZE -- -- If this is the first error encountered, display the message set the token -- to eof_t to abort the parsing and set the error_found flag to indicate -- that an error was encountered. (This is for use when generating internal -- byte code.) ----------------------------------------------------------------------------- procedure err_tokenize( msg:string; cmdline:string ) is lineStr : unbounded_string; begin if error_found then -- not first err? return; -- don't display end if; if inputMode /= interactive and inputMode /= breakout then -- a script? if gccOpt then -- gcc style? lineStr := to_unbounded_string( natural'image( getByteCodeLineNo ) ); -- remove leading if length( lineStr ) > 0 then -- space (if any) if element( lineStr, 1 ) = ' ' then delete( lineStr, 1, 1 ); end if; end if; put( standard_error, scriptFilePath ); -- show it put( standard_error, ":" ); put( standard_error, to_string( lineStr ) ); put( standard_error, ":1:" ); else put( standard_error, scriptFilePath ); -- otherwise put( standard_error, ":" ); -- leave leading put( standard_error, getByteCodeLineNo'img ); -- spaces in put( standard_error, ":1:" ); end if; else if gccOpt then -- gcc style? lineStr := to_unbounded_string( natural'image( getByteCodeLineNo ) ); -- remove leading if length( lineStr ) > 0 then -- space (if any) if element( lineStr, 1 ) = ' ' then delete( lineStr, 1, 1 ); end if; end if; put( standard_error, to_string( lineStr ) ); put( standard_error, ":1:" ); else put( standard_error, "In line" ); -- show line num put_line( standard_error, natural'image( getByteCodeLineNo ) ); end if; end if; -- Command line that errored (ie the current line) if not gccOpt then put_line( standard_error, cmdline ); -- display line -- Error Pointer for i in 1..firstpos-1 loop -- move to token put( standard_error, " " ); end loop; put( standard_error, "^" ); -- underline it if lastpos-1 > firstpos then for i in 1..lastpos-firstpos-2 loop put( standard_error, "-" ); end loop; put( standard_error, "^" ); end if; else put( standard_error, "[" & getDateString( ada.calendar.clock ) & "]" ); -- error time end if; -- Error Message put( standard_error, " " ); -- display the put_line( standard_error, msg ); -- error msg error_found := true; -- flag error token := eof_t; -- stop parser end err_tokenize; -- A record containing the compressed script being generated -- and the parse history (to recognize and deal wtih Bourne -- shell parameters). procedure resetRegisters( ci : in out CompressionInfo ) is -- called when we don't know if the index registers are valid -- anymore (ie. at a 'begin' or 'end;') begin ci.nextVMIR := 0; end resetRegisters; procedure freeVMNR( ci : in out compressionInfo; r : out aVMNRNumber ) is -- return a free General Purpose Numeric Register. If none, -- returns noRegister begin r := ci.nextVMNR; if ci.nextVMNR < aVMNRNumber'last then ci.nextVMNR := ci.nextVMNR + 1; end if; end freeVMNR; procedure freeVMSR( ci : in out compressionInfo; r : out aVMSRNumber ) is -- return a free General Purpose String Register. If none, -- returns noRegister begin r := ci.nextVMSR; if ci.nextVMSR < aVMSRNumber'last then ci.nextVMSR := ci.nextVMSR + 1; end if; end freeVMSR; function lookupVMNR( ci : compressionInfo; s : unbounded_string ) return aVMNRNumber is -- find a general purpose string register holding the value of id found : aVMNRNumber := aVMNRNumber( noRegister ); begin for r in aVMNRNumber'first..ci.nextVMNR-1 loop if ci.VMNRmap( r ) = s then found := r; end if; end loop; return found; end lookupVMNR; function lookupVMSR( ci : compressionInfo; s : unbounded_string ) return aVMSRNumber is -- find a general purpose string register holding the value of id found : aVMSRNumber := aVMSRNumber( noRegister ); begin for r in aVMSRNumber'first..ci.nextVMSR-1 loop if ci.VMSRmap( r ) = s then found := r; end if; end loop; return found; end lookupVMSR; function lookupVMIR( ci : compressionInfo; id : identifier ) return aVMIRNumber is -- find a general purpose numeric register holding the value of id found : aVMIRNumber := aVMIRNumber( noRegister ); begin for r in aVMIRNumber'first..ci.nextVMIR-1 loop if ci.VMIRmap( r ) = id then found := r; end if; end loop; return found; end lookupVMIR; ----------------------------------------------------------------------------- -- BYTE CODE PRECOMPILER ----------------------------------------------------------------------------- ----------------------------------------------------------------------------- -- SKIP WHITE SPACE -- -- Skip through spaces or horizontal tabs. Put uncompressed white space into -- compressed script ----------------------------------------------------------------------------- procedure skipWhiteSpace( ci : in out compressionInfo; command : unbounded_string ) is begin -- Skip white Space Before Token -- -- White space is spaces or horizontal tabs. If the end of line is reached, -- ignore white space and go to the end of line handler. Otherwise, attach -- uncompressed white space to compressed script. if Element( command, cmdpos ) = ' ' or Element( command, cmdpos ) = ASCII.HT then while Element( command, cmdpos ) = ' ' or Element( command, cmdpos ) = ASCII.HT loop cmdpos := cmdpos + 1; if cmdpos > length( command ) then -- ignore if at end of line return; end if; end loop; lastpos := cmdpos-1; ci.compressedScript := ci.compressedScript & slice( command, firstpos, lastpos ); firstpos := cmdpos; lastpos := cmdpos; end if; end skipWhiteSpace; ----------------------------------------------------------------------------- -- IS BYTE CODE -- -- Part of line2ByteCode (for isPart) -- -- "is" may be followed by a "record". Either one causes a start of statement -- but "is" must not start a new statement if "record" follows it. ----------------------------------------------------------------------------- procedure ISByteCode( ci : in out compressionInfo; command : unbounded_string ) is word : unbounded_string; id : identifier; begin firstpos := cmdpos; lastpos := cmdpos; -- Skip white Space Before Token -- -- White space is spaces or horizontal tabs. If the end of line is reached, -- ignore white space and go to the end of line handler. Otherwise, attach -- uncompressed white space to compressed script. if Element( command, cmdpos ) = ' ' or Element( command, cmdpos ) = ASCII.HT then while Element( command, cmdpos ) = ' ' or Element( command, cmdpos ) = ASCII.HT loop cmdpos := cmdpos + 1; if cmdpos > length( command ) then -- ignore if at end of line return; end if; end loop; lastpos := cmdpos-1; ci.compressedScript := ci.compressedScript & slice( command, firstpos, lastpos ); end if; -- Check for an Ada-style comment -- -- If it's an Ada comment, store it in the byte code and go to next handler -- to finish the line and return from this procedure. if Element( command, cmdpos ) = '-' and cmdpos < length( command ) then if Element( command, cmdpos+1 ) = '-' then ci.compressedScript := ci.compressedScript & slice( command, firstpos, length( command ) ); cmdpos := length( command ) + 1; return; end if; end if; -- Leading underscores if Element( command, cmdpos ) = '_' or -- identifiers with underscores not allowed, but we can't check for -- that here since we don't know yet if the identifier is part of a -- shell command or an AdaScript command (underscores in a shell -- command's parameters are OK). -- KB: Note this issues an AdaScript identifier, not a shell word, since -- a shell word is not possible right after an "is". -- Identifiers (Element( command, lastpos ) >= 'a' and Element( command, lastpos ) <='z') or (Element( command, lastpos ) >= 'A' and Element( command, lastpos ) <='Z') or Element( command, lastpos ) > ASCII.DEL then lastpos := cmdpos+1; word := null_unbounded_string; if Element( command, cmdpos ) > ASCII.DEL then word := word & toHighASCII( char_escape_t ); end if; word := word & Element( command, cmdpos ); if lastpos <= length( command ) then while is_alphanumeric( Element( command, lastpos ) ) or Element( command, lastpos ) = '_' or Element( command, lastpos ) = '.' or Element( command, lastpos ) > ASCII.DEL loop if Element( command, lastpos ) > ASCII.DEL then if Element( command, lastpos ) = ASCII.NUL or Element( command, lastpos ) = immediate_word_delimiter then err_tokenize( "ASCII character not allowed", to_string( command ) ); end if; if Element( command, lastpos ) > ASCII.DEL then word := word & toHighASCII( char_escape_t ); end if; elsif Element( command, lastpos ) = '.' and then lastpos < length( command ) then if Element( command, lastpos+1 ) = '.' then -- ".." exit; end if; end if; word := word & Element( command, lastpos ); lastpos := lastpos+1; exit when lastpos > length( command ); end loop; end if; id := eof_t; lastpos := lastpos - 1; -- word := To_Unbounded_string( slice( command, cmdpos, lastpos ) ); cmdpos := lastpos+1; for i in reverse 1..reserved_top-1 loop -- was identifiers top if identifiers( i ).name = word and not identifiers( i ).deleted then id := i; exit; end if; end loop; if ci.context = startOfStatement and id >= keywords_top then ci.context := startOfParameters; elsif ci.context = startOfStatement then ci.context := shellStatement; elsif id = record_t then ci.context := adaScriptStatement; else ci.context := adaScriptStatement; end if; -- add compressed token to compressed script declare pragma suppress( RANGE_CHECK ); -- GCC 3.3.3 (Red Hat Fedora Core 2) falsely reports a out-of-range -- exception. We'll do the range checking manually as a work -- around... begin if id /= eof_t and id < reserved_top then ci.compressedScript := ci.compressedScript & toHighASCII( id ); else ci.compressedScript := ci.compressedScript & word & immediate_word_delimiter; end if; -- ci.compressedScript := ci.compressedScript & -- slice( command, firstpos, lastpos ) & immediate_word_delimiter; exception when others => err_tokenize( "interal error: byte code generator: exception thrown", to_string( command) ); raise; end; -- anything else is a start of statement else ci.context := startOfStatement; end if; end ISByteCode; ----------------------------------------------------------------------------- -- ADA SCRIPT STATEMENT BYTE CODE -- -- Part of line2ByteCode -- -- We are continuing a new AdaScript statement. Convert it to byte code. ----------------------------------------------------------------------------- procedure adaScriptStatementByteCode( ci : in out compressionInfo; command : unbounded_string ) is id : identifier; word : unbounded_string; decimalCount : natural; octathorneCount : natural; uncompressed_script : unbounded_string; newstr : unbounded_string; -- nr : aVMNRNumber; -- sr : aVMSRNumber; -- ir : aVMIRNumber; tabAdjust : natural := 0; begin -- Tokenize keywords in the line. This is very similar to getNextToken -- except tokens are stored in the compressed script instead of in a -- variables. Keywords are stored as their symbol table position with -- the high-bit set. <> if cmdpos > length( command ) then -- end of line? -- ci.compressedScript := ci.compressedScript & ASCII.NUL; -- add ASCII 0 return; -- and quit end if; firstpos := cmdpos; -- prepare to lastpos := cmdpos; -- get token skipWhiteSpace( ci, command ); if cmdpos > length( command ) then goto next; end if; -- read through leading white space if Element( command, cmdpos ) = ' ' or Element( command, cmdpos ) = ASCII.HT then while Element( command, cmdpos ) = ' ' or Element( command, cmdpos ) = ASCII.HT loop cmdpos := cmdpos + 1; if cmdpos > length( command ) then -- ignore if at end of line goto next; end if; end loop; lastpos := cmdpos-1; ci.compressedScript := ci.compressedScript & slice( command, firstpos, lastpos ); goto next; end if; -- Illegal characters: control characters or characters with the high -- bit set (since those are used to represent compressed keywords). if is_control( Element( command, cmdpos ) ) then err_tokenize( "Unexpected character ASCII" & character'pos( Element( command, cmdpos ) )'img, to_string( command ) ); cmdpos := cmdpos + 1; return; -- Leading underscores elsif Element( command, cmdpos ) = '_' or -- identifiers with underscores not allowed, but we can't check for -- that here since we don't know yet if the identifier is part of a -- shell command or an AdaScript command (underscores in a shell -- command's parameters are OK). -- Identifiers (Element( command, lastpos ) >= 'a' and Element( command, lastpos ) <='z') or (Element( command, lastpos ) >= 'A' and Element( command, lastpos ) <='Z') or Element( command, lastpos ) > ASCII.DEL then lastpos := cmdpos+1; word := null_unbounded_string; if Element( command, cmdpos ) > ASCII.DEL then word := word & toHighASCII( char_escape_t ); end if; word := word & Element( command, cmdpos ); if lastpos <= length( command ) then while is_alphanumeric( Element( command, lastpos ) ) or Element( command, lastpos ) = '_' or Element( command, lastpos ) = '.' or Element( command, lastpos ) > ASCII.DEL loop if Element( command, lastpos ) > ASCII.DEL then if Element( command, lastpos ) = ASCII.NUL or Element( command, lastpos ) = immediate_word_delimiter then err_tokenize( "ASCII character not allowed", to_string( command ) ); end if; if Element( command, lastpos ) > ASCII.DEL then word := word & toHighASCII( char_escape_t ); end if; elsif Element( command, lastpos ) = '.' and then lastpos < length( command ) then if Element( command, lastpos+1 ) = '.' then -- ".." exit; end if; end if; word := word & Element( command, lastpos ); lastpos := lastpos+1; exit when lastpos > length( command ); end loop; end if; id := eof_t; lastpos := lastpos - 1; -- word := To_Unbounded_string( slice( command, cmdpos, lastpos ) ); cmdpos := lastpos+1; for i in reverse 1..reserved_top-1 loop -- was identifiers top if identifiers( i ).name = word and not identifiers( i ).deleted then id := i; exit; end if; end loop; if ci.context = startOfStatement and id >= keywords_top then ci.context := startOfParameters; elsif ci.context = startOfStatement then ci.context := shellStatement; elsif id = is_t then ci.context := isPart; elsif id=then_t or id=loop_t then ci.context := startOfStatement; end if; -- add compressed token to compressed script declare pragma suppress( RANGE_CHECK ); -- GCC 3.3.3 (Red Hat Fedora Core 2) falsely reports a out-of-range -- exception. We'll do the range checking manually as a work -- around... begin if id /= eof_t and id < reserved_top then ci.compressedScript := ci.compressedScript & toHighASCII( id ); else ci.compressedScript := ci.compressedScript & word & immediate_word_delimiter; end if; -- ci.compressedScript := ci.compressedScript & -- slice( command, firstpos, lastpos ) & immediate_word_delimiter; goto next; exception when others => err_tokenize( "interal error: byte code generator: exception thrown", to_string( command) ); raise; end; elsif is_digit( Element( command, cmdpos ) ) then -- numeric literal lastpos := cmdpos; decimalCount := 0; octathorneCount := 0; while is_digit( Element( command, lastpos)) or Element( command, lastpos) = '_' or Element( command, lastpos) = '#' or Element( command, lastpos) = '.' loop if Element( command, lastpos ) = '.' and then lastpos < length( command ) then if Element( command, lastpos+1 ) = '.' then -- ".." exit; end if; decimalCount := decimalCount+1; if decimalCount > 1 then cmdpos := lastpos; -- move cmdpos or infinite loop! err_tokenize( "too many decimal points in floating point literal", to_string( command ) ); return; end if; end if; if Element( command, lastpos ) = '#' then octathorneCount := octathorneCount + 1; if octathorneCount > 1 then lastpos := lastpos+1; exit; end if; end if; lastpos := lastpos+1; exit when lastpos > length( command ); end loop; cmdpos := lastpos; lastpos := lastpos-1; -- if firstpos /= lastpos then -- don't bother compressing 1 char numbers -- newstr := To_Unbounded_String( Slice( command, firstpos, lastpos ) ); -- nr := lookupVMNR( ci, newStr ); -- if nr /= aVMNRNumber( noRegister ) then -- ci.compressedScript := ci.compressedScript & -- character'val( 128 + integer( load_nr_t ) ) & -- character'val( nr+1 ); -- put_line( "Found number at register NR " & sr'img ); -- cmdpos := lastpos+1; -- skip last " -- goto next; -- else -- freeVMNR( ci, nr ); -- if nr /= aVMNRNumber( noRegister ) then -- VMNR( nr ) := newstr; -- map is redundant if we're pre-loading all numeric literals -- ci.VMNRmap( nr ) := newstr; -- ci.compressedScript := ci.compressedScript & -- character'val( 128 + integer( load_nr_t ) ) & -- character'val( nr+1 ); -- put_line( "Number at new register NR " & nr'img ); -- cmdpos := lastpos+1; -- skip last " -- goto next; -- else -- put_line( "No free numeric registers" ); -- end if; -- end if; -- end if; elsif Element( command, cmdpos ) = ''' then -- a char literal? cmdpos := cmdpos+1; -- skip single quote lastpos := cmdpos; -- first literal ch word := null_unbounded_string & '''; -- starts with ' if lastpos <= length( command ) then -- not EOL quote? -- SPECIAL CASE: ''' (single quoted single quote) if Element( command, lastpos ) = ''' then -- ''? if lastpos < length( command ) then -- not EOL? if Element( command, lastpos+1 ) = ''' then -- '''? lastpos := lastpos+2; -- skip literal cmdpos := lastpos; -- start here next ci.compressedScript := ci.compressedScript & "'''"; goto next; -- add & continue end if; -- otherwise end if; -- fall through end if; -- NORMAL CASE while Element( command, lastpos ) /= ''' loop -- not literal end? if Element( command, lastpos ) > ASCII.DEL then -- hi ascii? word := word & toHighASCII( char_escape_t ); -- escape it end if; word := word & Element( command, lastpos ); -- add letter lastpos := lastpos+1; -- advance one ch exit when lastpos > length( command ); -- EOL? bail end loop; end if; if lastpos > length( command ) then -- missing quote? err_tokenize( "missing single quote", to_string( command ) ); return; else word := word & '''; -- add ' to buffer lastpos := lastpos+1; -- skip last ' cmdpos := lastpos; -- start here next -- cmdpos := lastpos+1; end if; if lastpos-firstpos < 3 then -- too long? err_tokenize( "character literal too short--strings are delimited by double quote characters", to_string( command ) ); end if; if lastpos-firstpos > 3 then -- too long? err_tokenize( "character literal too long--strings are delimited by double quote characters", to_string( command ) ); end if; ci.compressedScript := ci.compressedScript & word; -- add literal goto next; elsif Element( command, cmdpos ) = '"' then cmdpos := cmdpos+1; lastpos := cmdpos; if lastpos <= length( command ) then -- quote as last char on line while Element( command, lastpos ) /= '"' loop lastpos := lastpos+1; exit when lastpos > length( command ); end loop; end if; if lastpos > length( command ) then err_tokenize( "missing double quote", to_string( command ) ); return; else -- newstr := To_Unbounded_String( Slice( command, cmdpos, lastpos-1 ) ); -- sr := lookupVMSR( ci, newStr ); -- if sr /= aVMSRNumber( noRegister ) then -- ci.compressedScript := ci.compressedScript & -- character'val( 128 + integer( load_sr_t ) ) & -- character'val( sr+1 ); -- put_line( "Found string at register SR " & sr'img ); -- cmdpos := lastpos+1; -- skip last " -- goto next; -- else -- freeVMSR( ci, sr ); -- if sr /= aVMSRNumber( noRegister ) then -- VMSR( sr ) := newstr; -- map is redundant if we're pre-loading all string literals -- ci.VMSRmap( sr ) := newstr; -- ci.compressedScript := ci.compressedScript & -- character'val( 128 + integer( load_sr_t ) ) & -- character'val( sr+1 ); -- put_line( "String at new register SR " & sr'img ); -- cmdpos := lastpos+1; -- skip last " -- goto next; -- else -- put_line( "No free string registers" ); -- end if; -- end if; cmdpos := lastpos+1; -- skip last " end if; elsif Element( command, cmdpos ) = '`' then cmdpos := cmdpos+1; lastpos := cmdpos; if lastpos <= length( command ) then -- quote as last char on line while Element( command, lastpos ) /= '`' loop lastpos := lastpos+1; exit when lastpos > length( command ); end loop; end if; if natural( lastpos ) > length( command ) then err_tokenize( "missing back quote", to_string( command ) ); return; else cmdpos := lastpos+1; -- skip last ` end if; else -- other punctuation symbols firstpos := cmdpos; case Element( command, natural( cmdpos ) ) is when ';' => ci.context := startOfStatement; ci.compressedScript := ci.compressedScript & Element( command, natural( cmdpos ) ); cmdpos := cmdpos + 1; return; when '$' => if cmdpos < length( command ) then cmdpos := cmdpos + 1; end if; when '?' => -- special case: switch to shell context and don't check parameters -- ci.context := shellStatement; -- ci.compressedScript := ci.compressedScript & "?"; -- cmdpos := cmdpos + 1; -- return; null; when '#' => -- shell comments used to be supported but not any more err_tokenize( "shell comment not supported", to_string( command ) ); cmdpos := length( command ); -- for read of next line (or EOF) when '-' => if cmdpos < length( command ) then if Element( command, cmdpos+1 ) = '-' then cmdpos := length( command ); -- for read of next line (or EOF) end if; end if; when '=' => if cmdpos < length( command ) then if Element( command, cmdpos+1 ) = '>' then cmdpos := cmdpos + 1; end if; end if; when ':' => if cmdpos < length( command ) then if Element( command, cmdpos+1 ) = '=' then cmdpos := cmdpos + 1; end if; end if; when '*' => if cmdpos < length( command ) then if Element( command, cmdpos+1 ) = '*' then cmdpos := cmdpos + 1; end if; end if; when '>' => if cmdpos < length( command ) then if Element( command, cmdpos+1 ) = '=' then cmdpos := cmdpos + 1; end if; end if; when '<' => if cmdpos < length( command ) then if Element( command, cmdpos+1 ) = '=' then cmdpos := cmdpos + 1; end if; end if; when '\' => if cmdpos < length( command ) then cmdpos := cmdpos + 1; end if; when '/' => if cmdpos < length( command ) then if Element( command, cmdpos+1 ) = '=' then cmdpos := cmdpos + 1; end if; end if; when '.' => if cmdpos < length( command ) then if Element( command, cmdpos+1 ) = '.' then cmdpos := cmdpos + 1; end if; end if; when others => null; -- just that character end case; lastpos := cmdpos; cmdpos := cmdpos + 1; end if; ci.compressedScript := ci.compressedScript & slice( command, firstpos, lastpos ); goto next; end adaScriptStatementByteCode; ----------------------------------------------------------------------------- -- SHELL STATEMENT BYTE CODE -- -- Part of line2ByteCode -- -- Translate a POSIX shell command (with quoted shell words) to byte code -- (Compressed Tokens). Don't look for Ada or SQL formatting but honour Ada -- comments. ----------------------------------------------------------------------------- procedure shellStatementByteCode( ci : in out compressionInfo; command : unbounded_string ) is word : unbounded_string; uncompressed_script : unbounded_string; newstr : unbounded_string; -- nr : aVMNRNumber; -- sr : aVMSRNumber; -- ir : aVMIRNumber; tabAdjust : natural := 0; escapingNext : boolean := false; ch : character; inDoubleQuotes : boolean; inSingleQuotes : boolean; inBackQuotes : boolean; inBackslash : boolean; begin -- Tokenize shell words in the line. This is very similar to getNextToken -- except tokens are stored in the compressed script instead of in a -- variables. Keywords are stored as their symbol table position with -- the high-bit set. -- Check for End of Line <> if cmdpos > length( command ) then -- end of line? -- ci.compressedScript := ci.compressedScript & ASCII.NUL; -- add ASCII 0 return; -- and quit end if; -- First Pos and Last Pos will mark the token text. Cmdpos is our position -- in the line (need a better name!) firstpos := cmdpos; -- prepare to lastpos := cmdpos; -- get token skipWhiteSpace( ci, command ); if cmdpos > length( command ) then goto next; end if; -- special tokens: check up front ch := Element( command, cmdpos ); if ch = ';' then ci.context := startOfStatement; ci.compressedScript := ci.compressedScript & ch; cmdpos := cmdpos + 1; return; elsif ch = '|' then ci.context := startOfStatement; ci.compressedScript := ci.compressedScript & ch; cmdpos := cmdpos + 1; return; end if; -- Check for an Ada-style comment -- -- If it's an Ada comment, store it in the byte code and go to next handler -- to finish the line and return from this procedure. if Element( command, cmdpos ) = '-' and cmdpos < length( command ) then if Element( command, cmdpos+1 ) = '-' then ci.compressedScript := ci.compressedScript & slice( command, firstpos, length( command ) ); cmdpos := length( command ) + 1; goto next; end if; end if; -- We are now looking at a shell word -- Handle Shell Words -- -- After skipping white space, we are expecting shell words. A shell word -- may contain double quotes, single quotes, back quotes and backslashes. -- Beware of Ada comments at the end of line. -- We start will all quoting off inDoubleQuotes := false; inSingleQuotes := false; inBackQuotes := false; inBackslash := false; -- Check for special single-character shell words -- -- A semi-colon is the last word of the shell statement. Return control -- to line2ByteCode in case next command is something other than another -- shell statement. -- -- A vertical bar always means another shell statement... ch := Element( command, cmdpos ); -- next character if ch = ';' then -- really, an AdaScript statement but we're not ready... ci.context := startOfStatement; ci.compressedScript := ci.compressedScript & slice( command, firstpos, lastpos ); cmdpos := cmdpos + 1; return; elsif ch = '|' then ci.compressedScript := ci.compressedScript & slice( command, firstpos, lastpos ); cmdpos := cmdpos + 1; goto next; end if; -- Loop through word loop -- First, check for end of line. -- -- Really, shell words should be extendable to a new line with \ at the -- end of line, but I'm not ready to support that tonight. if cmdpos > length( command ) then if inSingleQuotes then err_tokenize( "missing single quote", to_string( command ) ); return; end if; if inDoubleQuotes then err_tokenize( "missing double quote", to_string( command ) ); return; end if; if inBackQuotes then err_tokenize( "missing back quote", to_string( command ) ); return; end if; if inBackslash then err_tokenize( "missing backslashed character", to_string( command ) ); return; end if; lastpos := cmdpos - 1; ci.context := startOfStatement; exit; end if; ch := Element( command, cmdpos ); -- next character -- Second, check for characters that will interfere with the compressed -- tokens. if is_control( Element( command, cmdpos ) ) or Element( command, cmdpos ) > '~' then err_tokenize( "Unexpected character ASCII" & character'pos( Element( command, cmdpos ) )'img, to_string( command ) ); return; end if; -- Third, handle quoting quoting characters if ch = '"' and not inSingleQuotes and not inBackslash then inDoubleQuotes := not inDoubleQuotes; elsif ch = ''' and not inDoubleQuotes and not inBackslash then inSingleQuotes := not inSingleQuotes; elsif ch = '`' and not inSingleQuotes and not inBackslash then inBackQuotes := not inBackQuotes; elsif ch = '\' and not inSingleQuotes and not inBackslash then inBackslash := true; else -- Fourth, look for word terminators if not (inSingleQuotes or inDoubleQuotes or inBackQuotes or inBackslash) then if ch = ' ' then lastpos := cmdpos - 1; exit; elsif ch = ASCII.HT then lastpos := cmdpos - 1; exit; elsif ch = ';' then lastpos := cmdpos - 1; exit; elsif ch = '|' then lastpos := cmdpos - 1; exit; end if; end if; -- escaped -- Got here? Character is good. Backslashing? Turn it off. inBackslash := false; end if; -- no quoting characters cmdpos := cmdpos + 1; end loop; ci.compressedScript := ci.compressedScript & toHighASCII( imm_delim_t ) & slice( command, firstpos, lastpos ) & toHighASCII( imm_delim_t ); goto next; end shellStatementByteCode; ----------------------------------------------------------------------------- -- SQL STATEMENT BYTE CODE -- -- Part of line2ByteCode -- -- We are beginning a new SQL statement. Convert it to byte code. This is -- similar the shell words but it's one sentence. ----------------------------------------------------------------------------- procedure SQLStatementByteCode( ci : in out compressionInfo; command : unbounded_string ) is word : unbounded_string; uncompressed_script : unbounded_string; newstr : unbounded_string; tabAdjust : natural := 0; escapingNext : boolean := false; ch : character; inDoubleQuotes : boolean; inSingleQuotes : boolean; inBackQuotes : boolean; inBackslash : boolean; begin -- Tokenize shell words in the line. This is very similar to getNextToken -- except tokens are stored in the compressed script instead of in a -- variables. Keywords are stored as their symbol table position with -- the high-bit set. -- Check for End of Line <> if cmdpos > length( command ) then -- end of line? -- ci.compressedScript := ci.compressedScript & ASCII.NUL; -- add ASCII 0 return; -- and quit end if; -- Trailing shell params on the end if ci.context /= SQLstatement then return; end if; -- First Pos and Last Pos will mark the token text. Cmdpos is our position -- in the line (need a better name!) firstpos := cmdpos; -- prepare to lastpos := cmdpos; -- get token skipWhiteSpace( ci, command ); if cmdpos > length( command ) then goto next; end if; -- Check for an Ada-style comment -- -- If it's an Ada comment, store it in the byte code and go to next handler -- to finish the line and return from this procedure. if Element( command, cmdpos ) = '-' and cmdpos < length( command ) then if Element( command, cmdpos+1 ) = '-' then ci.compressedScript := ci.compressedScript & slice( command, firstpos, length( command ) ); cmdpos := length( command ) + 1; goto next; end if; end if; -- We are now looking at a shell word -- Handle Shell Words -- -- After skipping white space, we are expecting shell words. A shell word -- may contain double quotes, single quotes, back quotes and backslashes. -- Beware of Ada comments at the end of line. -- We start will all quoting off inDoubleQuotes := false; inSingleQuotes := false; inBackQuotes := false; inBackslash := false; -- Check for special single-character shell words -- -- A semi-colon is the last word of the shell statement. Return control -- to line2ByteCode in case next command is something other than another -- shell statement. -- -- A vertical bar always means another shell statement... ch := Element( command, cmdpos ); -- next character if ch = ';' then -- really, an AdaScript statement but we're not ready... ci.context := startOfStatement; ci.compressedScript := ci.compressedScript & slice( command, firstpos, lastpos ); cmdpos := cmdpos + 1; return; --elsif ch = '|' then -- ci.compressedScript := ci.compressedScript & slice( command, firstpos, lastpos ); -- cmdpos := cmdpos + 1; -- goto next; end if; -- Loop through word loop -- First, check for end of line. if cmdpos > length( command ) then if inSingleQuotes then err_tokenize( "missing single quote", to_string( command ) ); return; end if; if inDoubleQuotes then err_tokenize( "missing double quote", to_string( command ) ); return; end if; if inBackQuotes then err_tokenize( "missing back quote", to_string( command ) ); return; end if; if inBackslash then err_tokenize( "missing backslashed character", to_string( command ) ); return; end if; lastpos := cmdpos - 1; ci.compressedScript := ci.compressedScript & slice( command, firstpos, lastpos ); goto next; end if; ch := Element( command, cmdpos ); -- next character -- Check for charactes that will interfere with the compressed tokens. if is_control( Element( command, cmdpos ) ) or Element( command, cmdpos ) > '~' then err_tokenize( "Unexpected character ASCII" & character'pos( Element( command, cmdpos ) )'img, to_string( command ) ); return; end if; -- Second, handle quoting quoting characters if ch = '"' and not inSingleQuotes and not inBackslash then inDoubleQuotes := not inDoubleQuotes; elsif ch = ''' and not inDoubleQuotes and not inBackslash then inSingleQuotes := not inSingleQuotes; elsif ch = '\' and not inSingleQuotes and not inBackslash then inBackslash := true; else -- Third, look for word terminators if not (inSingleQuotes or inDoubleQuotes or inBackQuotes or inBackslash) then --if ch = ' ' then -- ci.context := startOfStatement; -- lastpos := cmdpos - 1; -- exit; --elsif ch = ASCII.HT then --if ch = ASCII.HT then -- ci.context := startOfStatement; -- lastpos := cmdpos - 1; -- exit; if ch = ';' then ci.context := startOfStatement; lastpos := cmdpos - 1; exit; elsif ch = '|' then ci.context := shellStatement; lastpos := cmdpos - 1; exit; elsif ch = '>' then ci.context := shellStatement; lastpos := cmdpos - 1; exit; end if; --if ch = ';' then -- ci.context := startOfStatement; -- lastpos := cmdpos - 1; -- exit; end if; -- escaped -- Got here? Character is good. Backslashing? Turn it off. inBackslash := false; end if; -- no quoting characters cmdpos := cmdpos + 1; end loop; ci.compressedScript := ci.compressedScript & toHighASCII( imm_sql_delim_t ) & slice( command, firstpos, lastpos ) & toHighASCII( imm_sql_delim_t ); goto next; end SQLStatementByteCode; ----------------------------------------------------------------------------- -- START OF PARAMETERS BYTE CODE -- -- Part of line2ByteCode -- -- Check for ( (AdaScript parameters), := (AdaScript statement), ... -- Besides skipping whitespace does nothing. ----------------------------------------------------------------------------- procedure startOfParametersByteCode( ci : in out compressionInfo; command : unbounded_string ) is ch : character; begin firstpos := cmdpos; lastpos := cmdpos; -- Skip white Space Before Token -- -- White space is spaces or horizontal tabs. If the end of line is reached, -- ignore white space and go to the end of line handler. Otherwise, attach -- uncompressed white space to compressed script. if Element( command, cmdpos ) = ' ' or Element( command, cmdpos ) = ASCII.HT then while Element( command, cmdpos ) = ' ' or Element( command, cmdpos ) = ASCII.HT loop cmdpos := cmdpos + 1; if cmdpos > length( command ) then -- ignore if at end of line return; end if; end loop; lastpos := cmdpos-1; ci.compressedScript := ci.compressedScript & slice( command, firstpos, lastpos ); end if; -- Check for an Ada-style comment -- -- If it's an Ada comment, store it in the byte code and go to next handler -- to finish the line and return from this procedure. if Element( command, cmdpos ) = '-' and cmdpos < length( command ) then if Element( command, cmdpos+1 ) = '-' then ci.compressedScript := ci.compressedScript & slice( command, firstpos, length( command ) ); cmdpos := length( command ) + 1; return; end if; end if; ch := Element( command, cmdpos ); case ch is when '(' => ci.context := adaScriptStatement; when ':' => ci.context := adaScriptStatement; when ',' => ci.context := adaScriptStatement; when others => ci.context := shellStatement; end case; end startOfParametersByteCode; ----------------------------------------------------------------------------- -- START OF DELETE PARAMETERS BYTE CODE -- -- Part of line2ByteCode -- -- The delete command has two formats: SQL and AdaScript, not Shell and -- AdaScript... This is similar to parametersByteCode. ----------------------------------------------------------------------------- procedure startOfDeleteParametersByteCode( ci : in out compressionInfo; command : unbounded_string ) is ch : character; begin firstpos := cmdpos; lastpos := cmdpos; -- Skip white Space Before Token -- -- White space is spaces or horizontal tabs. If the end of line is reached, -- ignore white space and go to the end of line handler. Otherwise, attach -- uncompressed white space to compressed script. if Element( command, cmdpos ) = ' ' or Element( command, cmdpos ) = ASCII.HT then while Element( command, cmdpos ) = ' ' or Element( command, cmdpos ) = ASCII.HT loop cmdpos := cmdpos + 1; if cmdpos > length( command ) then -- ignore if at end of line return; end if; end loop; lastpos := cmdpos-1; ci.compressedScript := ci.compressedScript & slice( command, firstpos, lastpos ); end if; -- Check for an Ada-style comment -- -- If it's an Ada comment, store it in the byte code and go to next handler -- to finish the line and return from this procedure. if Element( command, cmdpos ) = '-' and cmdpos < length( command ) then if Element( command, cmdpos+1 ) = '-' then ci.compressedScript := ci.compressedScript & slice( command, firstpos, length( command ) ); cmdpos := length( command ) + 1; return; end if; end if; ch := Element( command, cmdpos ); case ch is when '(' => ci.context := adaScriptStatement; when ':' => ci.context := adaScriptStatement; when others => ci.context := SQLStatement; end case; end startOfDeleteParametersByteCode; ----------------------------------------------------------------------------- -- START OF STATEMENT BYTE CODE -- -- Part of line2ByteCode -- -- We are beginning a new statement of unknown type (ie. after a ;). Handle -- the first word and compress it. Determine what context is next and return -- it. -- -- We don't know what the first word is. It must be treated as an Ada -- variable (or shell word in double quotes) so it can be declared in the -- symbol table. -- -- Results can be: -- Start of Parameters ( word := | ( | shell-words ) -- SQL Statement ( select | ... ) -- Shell Statement ( env | ... ) -- Ada Statement ( ... ) ----------------------------------------------------------------------------- procedure startOfStatementByteCode( ci : in out compressionInfo; command : unbounded_string ) is word : unbounded_string; uncompressed_script : unbounded_string; newstr : unbounded_string; -- nr : aVMNRNumber; -- sr : aVMSRNumber; -- ir : aVMIRNumber; tabAdjust : natural := 0; escapingNext : boolean := false; ch : character; id : identifier; -- backupPos : natural; begin -- Tokenize shell words in the line. This is very similar to getNextToken -- except tokens are stored in the compressed script instead of in a -- variables. Keywords are stored as their symbol table position with -- the high-bit set. -- Check for End of Line if cmdpos > length( command ) then -- end of line? ci.compressedScript := ci.compressedScript & ASCII.NUL; -- add ASCII 0 return; -- and quit end if; -- First Pos and Last Pos will mark the token text. Cmdpos is our position -- in the line (need a better name!) firstpos := cmdpos; -- prepare to lastpos := cmdpos; -- get token -- Skip white Space Before Token -- -- White space is spaces or horizontal tabs. If the end of line is reached, -- ignore white space and go to the end of line handler. Otherwise, attach -- uncompressed white space to compressed script. if Element( command, cmdpos ) = ' ' or Element( command, cmdpos ) = ASCII.HT then while Element( command, cmdpos ) = ' ' or Element( command, cmdpos ) = ASCII.HT loop cmdpos := cmdpos + 1; if cmdpos > length( command ) then -- ignore if at end of line return; end if; end loop; lastpos := cmdpos-1; ci.compressedScript := ci.compressedScript & slice( command, firstpos, lastpos ); -- goto next; end if; -- Check for an Ada-style comment -- -- If it's an Ada comment, store it in the byte code and go to next handler -- to finish the line and return from this procedure. if Element( command, cmdpos ) = '-' and cmdpos < length( command ) then if Element( command, cmdpos+1 ) = '-' then ci.compressedScript := ci.compressedScript & slice( command, firstpos, length( command ) ); cmdpos := length( command ) + 1; return; end if; end if; -- backupPos := cmdpos; -- First, try an AdaScript word. If that fails, treat it as a new identifier -- and declare it. Don't use quoted shell words since parser assumes new -- ident will be declared in the symbol table. id := eof_t; ch := Element( command, cmdpos ); if ch = '?' then ci.context := adaScriptStatement; return; elsif ch = '@' then ci.context := adaScriptStatement; return; elsif ch = '"' then cmdpos := cmdpos+1; lastpos := cmdpos; if lastpos <= length( command ) then -- quote as last char on line while Element( command, lastpos ) /= '"' loop lastpos := lastpos+1; exit when lastpos > length( command ); end loop; end if; if lastpos > length( command ) then err_tokenize( "missing double quote", to_string( command ) ); return; else cmdpos := lastpos+1; -- skip last " end if; ci.compressedScript := ci.compressedScript & slice( command, firstpos, lastpos ); ci.context := startOfParameters; return; elsif (ch >= 'a' and ch <='z') or (ch >= 'A' and ch <='Z') or ch > ASCII.DEL then lastpos := cmdpos+1; word := null_unbounded_string; if Element( command, cmdpos ) > ASCII.DEL then word := word & toHighASCII( char_escape_t ); end if; word := word & Element( command, cmdpos ); if lastpos <= length( command ) then while is_alphanumeric( Element( command, lastpos ) ) or Element( command, lastpos ) = '_' or Element( command, lastpos ) = '.' or Element( command, lastpos ) > ASCII.DEL loop if Element( command, lastpos ) > ASCII.DEL then if Element( command, lastpos ) = ASCII.NUL or Element( command, lastpos ) = immediate_word_delimiter then err_tokenize( "ASCII character not allowed", to_string( command ) ); end if; if Element( command, lastpos ) > ASCII.DEL then word := word & toHighASCII( char_escape_t ); end if; elsif Element( command, lastpos ) = '.' and then lastpos < length( command ) then if Element( command, lastpos+1 ) = '.' then -- ".." exit; end if; end if; word := word & Element( command, lastpos ); lastpos := lastpos+1; exit when lastpos > length( command ); end loop; end if; id := eof_t; lastpos := lastpos - 1; --word := To_Unbounded_string( slice( command, cmdpos, lastpos ) ); cmdpos := lastpos+1; for i in reverse 1..reserved_top-1 loop -- SHOULD "keyword_top" BE ALL? if identifiers( i ).name = word and not identifiers( i ).deleted then id := i; exit; end if; end loop; -- end if; -- If a keyword was found compress it and change the context if id /= eof_t then -- found it? if id < reserved_top then -- reserved word? ci.compressedScript := ci.compressedScript & -- ASCII 128 + toHighASCII( id ); -- sym table pos else -- should not occur ci.compressedScript := ci.compressedScript & -- store store as an word & immediate_word_delimiter; -- immediate word end if; if id = alter_t then -- first word alter? ci.context := SQLStatement; -- treat as SQL elsif id = insert_t then -- insert? ci.context := SQLStatement; -- treat as SQL elsif id = select_t then -- select? ci.context := SQLStatement; -- treat as SQL elsif id = update_t then -- update? ci.context := SQLStatement; -- treat as SQL elsif id = delete_t then -- delete? ci.context := StartOfDeleteParameters; -- SQL or Ada elsif id = env_t then -- env? ci.context := StartOfParameters; -- Shell or Ada elsif id = typeset_t then -- typeset? ci.context := adaScriptStatement; -- treat as Ada elsif id = unset_t then -- unset? ci.context := StartOfParameters; -- Shell or Ada elsif id = trace_t then -- trace? ci.context := StartOfParameters; -- Shell or Ada elsif id = help_t then -- help? ci.context := StartOfParameters; -- Shell or Ada elsif id = clear_t then -- clear? ci.context := StartOfParameters; -- Shell or Ada elsif id = jobs_t then -- job? ci.context := StartOfParameters; -- Shell or Ada elsif id = logout_t then -- logout? ci.context := StartOfParameters; -- Shell or Ada elsif id = pwd_t then -- pwd? ci.context := StartOfParameters; -- Shell or Ada elsif id = cd_t then -- cd? ci.context := StartOfParameters; -- Shell or Ada elsif id = history_t then -- history? ci.context := StartOfParameters; -- Shell or Ada elsif id = wait_t then -- wait? ci.context := StartOfParameters; -- Shell or Ada elsif id = step_t then -- step? ci.context := StartOfParameters; -- Shell or Ada -- elsif id = template_t then -- template? -- ci.context := StartOfParameters; -- Shell or Ada elsif id = begin_t then -- begin? ci.context := startOfStatement; -- don't know elsif id = is_t then -- is? ci.context := isPart; -- ci.context := startOfStatement; -- don't know elsif id = then_t then -- then? ci.context := startOfStatement; -- don't know elsif id = record_t then -- record? ci.context := startOfStatement; -- don't know elsif id = loop_t then -- loop? ci.context := startOfStatement; -- don't know -- THIS MAY BREAK LOOP LOOPS else -- otherwise ci.context := adaScriptStatement; -- assume Ada end if; else -- not reserved? ci.compressedScript := ci.compressedScript & -- store store as an word & immediate_word_delimiter; -- immediate word ci.context := StartOfParameters; -- Shell or Ada end if; else ci.context := StartOfParameters; end if; end startOfStatementByteCode; ----------------------------------------------------------------------------- -- LINE 2 BYTE CODE -- -- Receive a new line of a script and start/continue compiling the script -- into byte code (compressed tokens). ci is the context the text occurs in -- (e.g. were we working on SQL, Shell or an AdaScript statement, etc.) ----------------------------------------------------------------------------- procedure line2ByteCode( ci : in out compressionInfo; command : unbounded_string ) is tabAdjust : natural := 0; begin -- Next Line -- -- Remember: zero is reserved for end of line so count ends at 254+1 nextByteCodeLine; -- SOURCE IDENTIFICATION -- -- Start of line has file number (8-bit) and line number (16-bit). Each -- number is plus one to avoid ASCII 0 (reserved for end of lines). ci.compressedScript := ci.compressedScript & character'val( SourceFileNo+1 ); ci.compressedScript := ci.compressedScript & character'val( SourceLineNoLo+1 ); ci.compressedScript := ci.compressedScript & character'val( SourceLineNoHi+1 ); -- BLANK LINES -- -- A blank line ASCII 1 byte (no indentation) and an ASCII 0 byte (end of -- line). if length( command ) = 0 then ci.compressedScript := ci.compressedScript & ASCII.SOH & ASCII.NUL; return; end if; -- INDENT COMPRESSION -- -- The third character of the line is the indentation white space byte (plus -- one to avoid ASCII 0). (Tab stops are treated as 8 spaces.) cmdpos := 1; if Element( command, cmdpos ) = ' ' or Element( command, cmdpos ) = ASCII.HT then while Element( command, cmdpos ) = ' ' or Element( command, cmdpos ) = ASCII.HT loop if Element( command, cmdpos ) = ASCII.HT then tabAdjust := tabAdjust + 8 - ((cmdpos+tabAdjust) mod 8); end if; cmdpos := cmdpos + 1; exit when cmdpos > length( command ); -- ignore if at end of line end loop; lastpos := cmdpos-1; -- actually, first non-whitespace char if lastpos+tabAdjust > 254 then -- hopefully, never, but harmless lastpos := 254-tabAdjust; -- to truncate leading indentation end if; ci.compressedScript := ci.compressedScript & character'val( lastpos+1+tabAdjust ); firstpos := cmdpos; -- first non- lastpos := cmdpos; -- white char else -- no indent? ci.compressedScript := ci.compressedScript & ASCII.SOH; -- then ASCII 1 end if; -- Empty Command Line if cmdpos > length( command ) then ci.compressedScript := ci.compressedScript & ASCII.NUL; return; end if; -- BYTE CODE GENERATION -- -- Use the appropriate compiler for the context. <> if wasSIGINT then wasSIGINT := false; done := true; -- stop parsing exit_block := true; -- exit any block done_sub := false; -- only leaving subprogram if trace then -- tracing? explain put_trace( "Terminating" ); end if; return; end if; case ci.context is when startOfStatement => startOfStatementByteCode( ci, command ); when startOfParameters => startOfParametersByteCode( ci, command ); when startOfDeleteParameters => startOfDeleteParametersByteCode( ci, command ); when adaScriptStatement => adaScriptStatementByteCode( ci, command ); when shellStatement => shellStatementByteCode( ci, command ); when SQLStatement => SQLStatementByteCode( ci, command ); when ISPart => ISByteCode( ci, command ); when others => err_tokenize( "don't know how to handle compressionInfo context", to_string( command ) ); end case; -- Line not finished? if cmdpos <= length( command ) then goto next; end if; -- END OF LINE -- -- ASCII zero is the end of line character ci.compressedScript := ci.compressedScript & ASCII.NUL; -- add ASCII 0 -- put_line( "new_line2ByteCode final: " & toescaped( ci.compressedScript ) ); end line2ByteCode; ----------------------------------------------------------------------------- -- BEGIN BYTE CODE -- -- Part of Byte Code Generator -- Write the EOF header line to complete an AdaScript script. ----------------------------------------------------------------------------- procedure beginByteCode( ci : in out compressionInfo ) is pragma suppress( RANGE_CHECK ); -- GCC 3.3.3 (Fedora Core 2) falsely says overflow begin ci.compressedScript := null_unbounded_string; -- Script Header: 2 bytes ci.compressedScript := ci.compressedScript & ASCII.SOH; -- version 1 ci.compressedScript := ci.compressedScript & ASCII.NUL; -- reserved -- EOF Leader Line: 6 bytes ci.compressedScript := ci.compressedScript & ASCII.SOH; -- file number ci.compressedScript := ci.compressedScript & ASCII.SOH; -- line number low ci.compressedScript := ci.compressedScript & ASCII.SOH; -- line number high ci.compressedScript := ci.compressedScript & ASCII.SOH; -- indent ci.compressedScript := ci.compressedScript & toHighASCII( eof_t ); ci.compressedScript := ci.compressedScript & ASCII.NUL; -- EOL marker end beginByteCode; ----------------------------------------------------------------------------- -- END BYTE CODE -- -- Part of Byte Code Generator -- Write the EOF trailer line to complete an AdaScript script. ----------------------------------------------------------------------------- procedure endByteCode( ci : in out compressionInfo ) is pragma suppress( RANGE_CHECK ); -- GCC 3.3.3 (Fedora Core 2) falsely says overflow begin -- EOF Trailer Line: 6 bytes ci.compressedScript := ci.compressedScript & ASCII.SOH; -- trailing sent. ci.compressedScript := ci.compressedScript & character'val( SourceLineNoLo+1 ); ci.compressedScript := ci.compressedScript & character'val( SourceLineNoHi+1 ); ci.compressedScript := ci.compressedScript & ASCII.SOH; ci.compressedScript := ci.compressedScript & toHighASCII( eof_t ); ci.compressedScript := ci.compressedScript & ASCII.NUL; end endByteCode; ----------------------------------------------------------------------------- -- DUMP BYTE CODE -- -- Display the compressed script to standard output in a readable form. ----------------------------------------------------------------------------- procedure dumpByteCode( ci : compressionInfo ) is line : integer := 0; begin put_line( "--- Byte Code dump ---------------------------------------------------" ); put( " H: 1:" ); put( ToEscaped( to_unbounded_string( "" & Element( ci.compressedScript, 1 ) ) ) ); put( ToEscaped( to_unbounded_string( "" & Element( ci.compressedScript, 2 ) ) ) ); put_line( " Byte Code Version: " & character'pos( Element( ci.compressedScript, 1 ) )'img ); put( " 0: 3:" ); for i in 3..length( ci.compressedScript ) loop put( ToEscaped( to_unbounded_string( "" & Element( ci.compressedScript, i ) ) ) ); if Element( ci.compressedScript, i ) = ASCII.NUL then line := line + 1; if i /= length( ci.compressedScript ) then new_line; if wasSIGINT then wasSIGINT := false; done := true; -- stop parsing exit_block := true; -- exit any block done_sub := false; -- only leaving subprogram if trace then -- tracing? explain put_trace( "Terminating" ); end if; exit; end if; put( line, width => 3 ); put( ":" ); put( i+1, width => 4 ); put( ":" ); end if; end if; end loop; new_line; put_line( "Byte Code Size =" & length( ci.compressedScript )'img ); end dumpByteCode; ----------------------------------------------------------------------------- -- COMPILE COMMAND OR TEMPLATE -- -- Compile into byte code a command typed interactively at the command prompt -- or backquotes or templates. ----------------------------------------------------------------------------- procedure compileCommandOrTemplate( command : unbounded_string ) is ci : compressionInfo; linePos : integer; firstLinePos : integer; lastLinePos : integer; line2compile : unbounded_string; begin if script /= null then -- discard script free( script ); end if; cmdpos := firstScriptCommandOffset; -- Reset cmdpos to beginning of script beginByteCode( ci ); -- Find lines and compress each in turn linePos := 1; firstLinePos := linePos; -- Null command? if linepos > length( command ) then line2ByteCode( ci, null_unbounded_string ); end if; -- Multiple lines while linepos <= length( command ) and not error_found loop -- anything left? loop exit when element( command, linePos ) = ASCII.LF; -- UNIX/Linux EOL exit when element( command, linePos ) = ASCII.CR; -- DOS/Apple EOL linePos := linePos + 1; -- next character exit when error_found; -- quit on err exit when linePos > length( command ); -- if not beyond EOF end loop; lastLinePos := linePos - 1; -- back up one line2compile := to_unbounded_string( slice( command, firstLinePos, lastLinePos ) ); line2ByteCode( ci, line2compile ); -- compress that slice -- DOS text files have CR+LF if linePos < length( command ) then if element( command, linePos ) = ASCII.CR then if element( command, linePos+1 ) = ASCII.LF then -- skip extra LF linePos := linePos + 1; end if; end if; end if; linePos := linePos+1; -- skip term char firstLinePos := linePos; end loop; nextByteCodeLine; endByteCode( ci ); -- Verbose? Show the byte code if verboseOpt then dumpByteCode( ci ); end if; script := new string( 1..length( ci.compressedScript ) ); -- alloc script script.all := to_string( ci.compressedScript ); -- and copy identifiers( source_info_script_size_t ).value := delete( to_unbounded_string( script.all'length'img), 1, 1 ); end compileCommandOrTemplate; ----------------------------------------------------------------------------- -- COMPILE TEMPLATE -- -- Compile into byte code a command from a template. Set the line number -- to reflect the template file line number. ----------------------------------------------------------------------------- procedure compileTemplate( command : unbounded_string; lineno : natural ) is begin setByteCodeLine( lineno ); compileCommandOrTemplate( command ); end compileTemplate; ----------------------------------------------------------------------------- -- COMPILE COMMAND -- -- Compile into byte code a command typed interactively at the command prompt -- or backquotes or templates. ----------------------------------------------------------------------------- procedure compileCommand( command : unbounded_string ) is begin SourceLineNoLo := 0; SourceLineNoHi := 0; compileCommandOrTemplate( command ); end compileCommand; ----------------------------------------------------------------------------- -- COMPILE SCRIPT -- -- Compile into byte code a command loaded from a script file. ----------------------------------------------------------------------------- procedure compileScript( firstLine : unbounded_string ) is ci : compressionInfo; newstr : unbounded_string; command : aliased unbounded_string := firstLine; compileDone : boolean := false; lastLineNumber : natural := 0; begin SourceLineNoLo := 0; SourceLineNoHi := 0; if script /= null then -- discard script free( script ); end if; cmdpos := firstScriptCommandOffset; -- Reset cmdpos to beginning of script SourceLineNoLo := 0; -- Reset line number SourceLineNoHi := 0; beginByteCode( ci ); -- parser loads first line...check for "#!" signature line and ignore it if length( command ) > 0 then if element( command, 1 ) = '#' then compileDone := not LineRead( command'access ); -- quit when done SourceLineNoLo := 1; -- skip 1 line end if; end if; if verboseOpt then put_line( standard_error, "=> (Line 1 ...)" ); end if; -- compile the script into byte code while not compileDone loop -- for all lines if verboseOpt then if getByteCodeLineNo >= lastLineNumber + 500 then lastLineNumber := getByteCodeLineNo; put_line( standard_error, to_string( term( up ) & "=> (Line" & lastLineNumber'img & " ...)" ) ); end if; end if; line2ByteCode( ci, command ); -- compress line exit when error_found; -- quit on err compileDone := not LineRead( command'access ); -- quit when done end loop; nextByteCodeLine; endByteCode( ci ); -- Verbose? Show the byte code if verboseOpt then dumpByteCode( ci ); end if; script := new string( 1..length( ci.compressedScript ) ); -- alloc script script.all := to_string( ci.compressedScript ); -- and copy identifiers( source_info_script_size_t ).value := delete( to_unbounded_string( script.all'length'img), 1, 1 ); end compileScript; ----------------------------------------------------------------------------- -- COPY BYTE CODE LINES -- copy the byte code lines containing point1 through point2. ----------------------------------------------------------------------------- function copyByteCodeLines( point1, point2 : natural ) return string is line_firstpos : natural; -- start of compiled lines line_lastpos : natural; -- end of compiled lines begin -- Script unexpectedly null? Print a message an let an exception be raised -- later. if script = null then put_line( standard_error, "internal_error: copyByteCodeLines: script is null" ); return ""; end if; -- Invalid range test if point1 > point2 then put_line( standard_error, "internal error: copyByteCodeLines: point1 " & point1'img & " is greater than point2 " & point2'img ); return ""; end if; -- point2 has an insane value? Print a message and let an exception be -- raised later. if point2 > script'length then put_line( standard_error, "internal_error: copyByteCodeLines: cmdpos " & cmdpos'img & " is greater than length of script " & script'length'img ); return ""; end if; -- Prepare to find the start and end of the command line line_firstpos := point1; line_lastpos := point2; -- find beginning and end of command line -- (as it appears in the byte code) if line_firstpos > 1 then -- sane value? line_firstpos := line_firstpos - 1; -- search for previous while script( line_firstpos ) /= ASCII.NUL loop -- ASCII.NUL line_firstpos := line_firstpos - 1; end loop; end if; if line_lastpos <= script'length then -- sane value? while script( line_lastpos ) /= ASCII.NUL loop -- look for next line_lastpos := line_lastpos + 1; -- ASCII.NUL end loop; -- or this one if end if; -- on one line_firstpos := line_firstpos + 1; -- cut the lines return script( line_firstpos..line_lastpos ); end copyByteCodeLines; ----------------------------------------------------------------------------- -- CREATE USER-DEFINED BYTE CODE -- -- Take byte code compiled in the script for a user-defined procedure or -- function and add the necessary header/trailer code to make it a complete, -- stand-alone script. (Without this, the scanner can become confused. For -- example, err will not be able to find the start of the first line.) ----------------------------------------------------------------------------- --function createUserDefinedByteCode( byteCode : string ) return -- unbounded_string is -- ci : compressionInfo; --begin -- beginByteCode( ci ); -- add the start -- ci.compressedScript := ci.compressedScript & byteCode; -- compiled b.c. -- endByteCode( ci ); -- add the end -- return ci.compressedScript; -- return it --end createUserDefinedByteCode; ----------------------------------------------------------------------------- -- REPLACE SCRIPT WITH FRAGMENT -- -- Switch the current byte code script with another byte code script string. -- The byte code is assumed to be a piece of byte code and requires the -- headers/trailers added to the byte code. ----------------------------------------------------------------------------- procedure replaceScriptWithFragment( bytecode : unbounded_string ) is ci : compressionInfo; begin if script /= null then -- discard script free( script ); end if; cmdpos := firstScriptCommandOffset; -- Reset cmdpos to beginning of script SourceLineNoLo := 0; -- Reset line number SourceLineNoHi := 0; beginByteCode( ci ); ci.compressedScript := ci.compressedScript & bytecode; endByteCode( ci ); --if verboseOpt then -- dumpByteCode( ci ); --end if; script := new string( 1..length( ci.compressedScript ) ); -- alloc script script.all := to_string( ci.compressedScript ); -- and copy -- identifiers( source_info_script_size_t ).value := delete( to_unbounded_string( script.all'length'img), 1, 1 ); end replaceScriptWithFragment; ----------------------------------------------------------------------------- -- REPLACE SCRIPT -- -- Switch the current byte code script with another byte code script string. ----------------------------------------------------------------------------- procedure replaceScript( bytecode : unbounded_string ) is ci : compressionInfo; begin if script /= null then -- discard script free( script ); end if; ci.compressedScript := ci.compressedScript & bytecode; --if verboseOpt then -- dumpByteCode( ci ); --end if; script := new string( 1..length( ci.compressedScript ) ); -- alloc script script.all := to_string( ci.compressedScript ); -- and copy -- identifiers( source_info_script_size_t ).value := delete( to_unbounded_string( script.all'length'img), 1, 1 ); end replaceScript; end scanner;