/* asm.c: primitive redcode assembler * $Id: asm.c,v 1.2 2003/06/09 12:18:48 martinus Exp $ */ /* This file is part of `exhaust', a memory array redcode simulator. * Copyright (C) 2002 M Joonas Pihlaja * Public Domain. */ /* The format of lines with instructions should be: * * [START] OPCODE.MODIFIER A-MODE INT , B-MODE INT * * The ORG pseudo-op is ignored, as is the label after an optional * END (if given). The only label recognised is START. No fuss over * the amount of white space, as long as it exists where required. * * Comments are recognised and discarded as is any line starting * with "Program". The output from `pmars -r 0 Your_Real_Source.red' * should assemble fine with this tiny assembler. * * * Functions in this file: * * asm_line(), asm_file(), asm_fname(), dis1(), * discore() */ #include #ifdef SYSV #include #else #include #endif #include #include #include "exhaust.h" #include "insn.h" #include "asm.h" /* str_tok_t: container for tokens we identify. */ typedef struct str_toks_st { char *s; /* name of token */ int c; /* token code */ } str_toks_t; /* Data * * tok_buf[]: globally used to keep the contents of string tokens * tok_int: if the token was a TOK_INT, the value of the token is here * * str_toks[]: table of multicharacter tokens we identify * */ #define MAX_ALL_CHARS 256 static char tok_buf[MAX_ALL_CHARS]; static int tok_int; static str_toks_t str_toks[] = { { "DAT", TOK_DAT }, /* opcodes */ { "SPL", TOK_SPL }, { "MOV", TOK_MOV }, { "DJN", TOK_DJN }, { "ADD", TOK_ADD }, { "JMZ", TOK_JMZ }, { "SUB", TOK_SUB }, { "MOD", TOK_MOD }, { "CMP", TOK_SEQ }, { "SEQ", TOK_SEQ }, { "JMP", TOK_JMP }, { "JMN", TOK_JMN }, { "SNE", TOK_SNE }, { "MUL", TOK_MUL }, { "DIV", TOK_DIV }, { "SLT", TOK_SLT }, { "NOP", TOK_NOP }, { "LDP", TOK_LDP }, { "STP", TOK_STP }, { "ORG", TOK_ORG }, /* pseudo-ops */ { "END", TOK_END }, { "PIN", TOK_PIN }, { "START", TOK_START }, { "F", TOK_mF }, /* modifiers */ { "A", TOK_mA }, { "B", TOK_mB }, { "AB", TOK_mAB }, { "BA", TOK_mBA }, { "X", TOK_mX }, { "I", TOK_mI }, { NULL, 0 } /* sentinel */ }; /* NAME * get_tok -- read the next token from a string * * SYNOPSIS * const char *get_tok( const char *s, int *tok ); * * INPUTS * s -- string to read token from * tok -- where we store the token code of the read token * * RESULTS * The token code of the read token is stored into *tok, * with 0 signifying end of input. * * If the token was an integer, its value is stored into * the global `tok_int'. Integers may be in any base >= 10 * as according to strtol(). * * String tokens are converted to upper case when storing * them into the global `tok_str[]'. They are concatenated * at 255 characters. * * RETURN VALUE * Pointer to the character past the read token, or * to the nul character if at end of input. * * GLOBALS * tok_buf[] -- a string or char token is copied here * tok_int -- the value of an integer token * str_toks[] -- used to identify string tokens */ /* skip_white(): returns ptr. to next non-whitespace char in s */ static const char * skip_white(char const *s) { while ( isspace(*s) ) s++; return s; } static const char * get_tok(const char * s, int *tok ) { char *tok_str = tok_buf; int i; s = skip_white(s); if ( *s == 0 ) return (*tok = 0, s); /* * Tokenize strings. * * String tokens must start with a letter and consist of * letters, digits, and underscores. Strings are * converted to upper case. */ tok_buf[1] = tok_buf[0] = 0; i = 0; if ( isalpha(*s) ) while ( (isalnum(*s) || *s == '_') && ++i < MAX_ALL_CHARS ) *tok_str++ = toupper(*s++); *tok_str = 0; if ( tok_str > tok_buf ) { /* * was a string token -- identify it by searching through * the str_toks[] array. */ for ( i = 0; str_toks[i].s ; i++ ) { if ( 0 == strcmp( str_toks[i].s, tok_buf ) ) { *tok = str_toks[i].c; return s; } } *tok = TOK_STR; /* normal string, not special */ return s; } /* * Tokenize ints. * Must match /-?[0-9]/ */ if ( isdigit(*s) || ( *s == '-' && isdigit(*(s+1)) )) { char *endptr; tok_int = strtol( s, &endptr, 0 ); *tok = TOK_INT; return endptr; } /* * Tokenize addressing modes and pass single chars */ tok_buf[0] = *s; /* store char value as single */ tok_buf[1] = 0; /* char string. */ switch ( *tok = *s++ ) { case '$': *tok = TOK_DIRECT; break; case '#': *tok = TOK_IMMEDIATE; break; case '*': *tok = TOK_AINDIRECT; break; case '@': *tok = TOK_BINDIRECT; break; case '{': *tok = TOK_APREDEC; break; case '<': *tok = TOK_BPREDEC; break; case '}': *tok = TOK_APOSTINC; break; case '>': *tok = TOK_BPOSTINC; break; } return s; } /* NAME * panic_bad_token -- issue an error message for a bad token and exit(1) * * SYNOPSIS * void panic_bad_token( int tok, const char *expected ); * * INPUTS * tok -- token code of unexpected token * expected -- a string describing what kind of token * was expected. e.g. "a modifier". * * RESULTS * A message Informing the user of the unexpected token, * its possible semantic value, and what type of token * was expected instead. * * GLOBALS * tok_buf, tok_int -- if the token has semantic value we look * for it here. * BUGS * The error message should be much better -- not even location * in the source is given here. *sigh* */ static void panic_bad_token(int tok, const char* expected ) { char *errstr = NULL; char buf[30]; memset(buf, 0, 30); /* make an errstr */ if ( tok_buf[0] ) errstr = tok_buf; if ( tok == TOK_INT ) { sprintf(buf, "%d", tok_int ); errstr = buf; } /* complain and exit with error code */ fprintf(stderr, "token '%s' not %s\n", errstr, expected ); exit(1); } /* NAME * asm_line -- assemble a line to an instruction * * SYNOPSIS * int asm_line( const char *line, insn_t *in, unsigned int CORESIZE ); * * INPUTS * line -- line to assemble * in -- instruction to assemble into * CORESIZE -- size of core * * RESULTS * If there was anything to assemble, it is assembled into * `in'. If there was a START label, the corresponding flag * is set in the instructions flags. Incomplete or erroneous * input prompt a quick error message and exit(1). * * If the 'ORG start-address' construct is encountered where * `start-address' is an integer, then the `in->a' field contains * the offset in instructions from the start of the warrior * where the warrior should start execution. * * If 'PIN id' is encountered, where `id' is an integer, then the * `in->a' field contains the `id'. * * RETURN VALUE * ASMLINE_PIN : pseudo-op 'PIN' encountered, id saved in `in->a'. * ASMLINE_ORG : pseudo-op 'ORG' encountered, warrior start * saved in `in->a'. * ASMLINE_DONE : done assembling, END opcode found, nothing assembled. * ASMLINE_NONE : nothing to assemble on this line. * ASMLINE_OK : assembled instruction into `in' OK. * * GLOBALS * tok_int, tok_buf[], str_toks[] somewhere down the line. */ int asm_line(const char * line, insn_t * in, unsigned int CORESIZE) { const char *s = line; int tok; int flags = 0; int op, m, ma, mb; /* opcode, modifier, a-mode, b-mode */ s = get_tok( s, &tok ); if ( tok == 0 ) return ASMLINE_NONE; /* * Ignore string lines '^Program.*' and comments. */ if ( tok == TOK_STR && 0 == strcmp( "PROGRAM", tok_buf )) { return ASMLINE_NONE; } if ( tok == ';' ) return ASMLINE_NONE; /* * Now match the instruction's various components: * [START label,] opcode, modifier, a-mode, a-value, b-mode, b-value */ /* Match possible start label */ if ( tok == TOK_START ) { flags |= fl_START; s = get_tok( s, &tok ); } /* Match opcode */ if ( is_tok_pseudoop(tok) ) { switch ( tok ) { case TOK_END: return ASMLINE_DONE; /* signal done assembling */ case TOK_ORG: s = get_tok( s, &tok ); /* get the next token */ if ( tok == TOK_START ) /* ignore: */ return ASMLINE_NONE; /* start label already matched and processed */ if ( tok != TOK_INT ) { panic_bad_token( tok, "an integer -- an int or \"START\" " "follows ORG" ); } in->a = tok_int; return ASMLINE_ORG; case TOK_PIN: s = get_tok( s, &tok ); if ( tok != TOK_INT ) { panic_bad_token( tok, "an integer -- PIN must be an unsigned integer"); } in->a = tok_int; return ASMLINE_PIN; default: panic_bad_token( tok, "a pseudo-op (internal assembler error)" ); } } if (!( is_tok_opcode(tok))) panic_bad_token( tok, "an opcode" ); op = DAT; switch(tok) { case TOK_DAT: op = DAT; break; case TOK_SPL: op = SPL; break; case TOK_MOV: op = MOV; break; case TOK_JMP: op = JMP; break; case TOK_JMZ: op = JMZ; break; case TOK_JMN: op = JMN; break; case TOK_ADD: op = ADD; break; case TOK_SUB: op = SUB; break; case TOK_SEQ: op = SEQ; break; case TOK_SNE: op = SNE; break; case TOK_MUL: op = MUL; break; case TOK_DIV: op = DIV; break; case TOK_DJN: op = DJN; break; case TOK_SLT: op = SLT; break; case TOK_MOD: op = MODM; break; case TOK_NOP: op = NOP; break; case TOK_LDP: op = LDP; break; case TOK_STP: op = STP; break; default: panic_bad_token( tok, "an opcode" ); } /* Match modifier */ s = get_tok( s, &tok ); /* first the '.' */ if ( tok != '.' ) panic_bad_token( tok, "'.'" ); s = get_tok( s, &tok ); /* then the modifier itself */ if ( ! is_tok_modifier(tok) ) panic_bad_token( tok, "a modifier"); m = tok - TOK_mF; /* Match a-field addressing mode and a-field */ s = get_tok( s, &tok ); if ( ! is_tok_mode(tok) ) panic_bad_token( tok, "an addressing mode specifier"); ma = tok - TOK_DIRECT; s = get_tok( s, &tok ); if ( tok != TOK_INT ) panic_bad_token( tok, "an integer"); in->a = MODS(tok_int,CORESIZE); /* Match comma */ s = get_tok( s, &tok ); if ( tok != ',' ) panic_bad_token( tok, "','" ); /* Match b-field addressing mode and a-field */ s = get_tok( s, &tok ); if ( ! is_tok_mode(tok) ) panic_bad_token( tok, "an addressing mode specifier"); mb = tok - TOK_DIRECT; s = get_tok( s, &tok ); if ( tok != TOK_INT ) panic_bad_token( tok, "an integer"); in->b = MODS(tok_int,CORESIZE); /* * Set flags and ignore the rest of the line */ in->in = (flags << flPOS) | OP( op, m, ma, mb ); return ASMLINE_OK; } /* NAME * asm_file, asm_fname -- assemble a FILE into a warrior * * SYNOPSIS * void asm_file( FILE *F, warrior_t *w, unsigned int CORESIZE ); * void asm_fname( const char *filename, warrior_t *w, * unsigned int CORESIZE ); * * INPUTS * w -- warrior_t to assemble into. * F -- stream to read warrior source from * filename -- path to source file. May be '-' * which is interpreted as stdin. * CORESIZE -- just that * * DESCRIPTION * These functions assemble a source file into a * warrior_t setting all the non-info fields. * * RESULTS * If the warrior assembled correctly, then warrior_t * contains its code and starting offset. If an error * occured during assembly, an error message is issued * and the program exit()s. * * GLOBALS * none as such, subroutines use tok_buf[], tok_int, str_toks[], * MAXLENGTH constant * * SEE ALSO * asm_line() * * BUGS * Its not really acceptable to exit() on an assembly error. */ void asm_file(FILE * F, warrior_t *w, unsigned int CORESIZE) { char line[MAX_ALL_CHARS]; insn_t *c; int ret; /* return code from asm_line() */ w->len = w->start = 0; w->have_pin = 0; w->pin = 0; c = w->code; while ( fgets(line, MAX_ALL_CHARS, F) ) { ret = asm_line( line, c, CORESIZE ); if ( ret == ASMLINE_DONE ) break; switch ( ret ) { case ASMLINE_OK: if ( get_flags( c->in ) & fl_START ) w->start = w->len; if ( w->len < MAXLENGTH) c++; w->len++; break; case ASMLINE_ORG: w->start = c->a; /* was `ORG int', get the starting address */ break; case ASMLINE_NONE: break; /* nop */ case ASMLINE_PIN: w->have_pin = 1; w->pin = c->a; /* save PIN. */ break; default: fprintf(stderr,"asm.c/asm_file(): illegal return code from asm_line()\n"); exit(1); } if ( w->len > MAXLENGTH ) { fprintf(stderr, "too many instructions in warrior %d\n", w->no); exit(1); } } if ( w->start >= w->len ) { fprintf(stderr, "starting address must be inside warrior body\n" ); exit(1); } } void asm_fname(const char * fname, warrior_t *w, unsigned int CORESIZE) { FILE *F; int is_stdin = 0; if ( strcmp( fname, "-" ) == 0 ) { F = stdin; is_stdin = 1; } else if (!( F = fopen(fname, "r") )) { fprintf(stderr, "can't open file %s\n", fname); exit(1); } asm_file(F, w, CORESIZE); if ( !is_stdin ) fclose(F); } /* NAME * dis1 -- disasemble an instruction * discore -- disasemble a segment of core * * SYNOPSIS * void dis1( char *s, inst_t in, unsigned int CORESIZE ); * void discore( inst_t *core, int start, int end, * unsigned int CORESIZE ); * * INPUTS * s -- string to print disassembled instruction to. A string * of length 60 should be more than sufficient. * in -- instruction to disassemble * core -- pointer to start of core * start -- core segment start offset * end -- core segment end offset (excluded) * * RESULTS * dis1 -- The disassembled instruction is printed to `s'. * discore -- A segment of core is dissasembled and printed * to stdout with core addresses. */ void dis1(char* s, insn_t in, unsigned int CORESIZE) { int x; char *op_s, *mo_s, *ma_s, *mb_s; int af, bf; x = (in.in >> opPOS) & opMASK; switch( x ) { case DAT: op_s = "dat"; break; case SPL: op_s = "spl"; break; case MOV: op_s = "mov"; break; case JMP: op_s = "jmp"; break; case JMZ: op_s = "jmz"; break; case JMN: op_s = "jmn"; break; case ADD: op_s = "add"; break; case SUB: op_s = "sub"; break; case SEQ: op_s = "seq"; break; case SNE: op_s = "sne"; break; case MUL: op_s = "mul"; break; case DIV: op_s = "div"; break; case DJN: op_s = "djn"; break; case SLT: op_s = "slt"; break; case MODM: op_s = "mod"; break; case NOP: op_s = "nop"; break; case LDP: op_s = "ldp"; break; case STP: op_s = "stp"; break; default: op_s = "???"; } x = (in.in >> moPOS) & moMASK; switch ( x ) { case mF: mo_s = "f "; break; case mA: mo_s = "a "; break; case mB: mo_s = "b "; break; case mAB: mo_s = "ab"; break; case mBA: mo_s = "ba"; break; case mX: mo_s = "x "; break; case mI: mo_s = "i "; break; default: mo_s = "?"; } x = (in.in >> maPOS) & mMASK; switch (x) { case DIRECT: ma_s = "$"; break; case IMMEDIATE: ma_s = "#"; break; case AINDIRECT: ma_s = "*"; break; case BINDIRECT: ma_s = "@"; break; case APREDEC: ma_s = "{"; break; case APOSTINC: ma_s = "}"; break; case BPREDEC: ma_s = "<"; break; case BPOSTINC: ma_s = ">"; break; default: ma_s = "?"; } x = (in.in >> mbPOS) & mMASK; switch (x) { case DIRECT: mb_s = "$"; break; case IMMEDIATE: mb_s = "#"; break; case AINDIRECT: mb_s = "*"; break; case BINDIRECT: mb_s = "@"; break; case APREDEC: mb_s = "{"; break; case APOSTINC: mb_s = "}"; break; case BPREDEC: mb_s = "<"; break; case BPOSTINC: mb_s = ">"; break; default: mb_s = "?"; } af = in.a <= CORESIZE/2 ? in.a : in.a - CORESIZE; bf = in.b <= CORESIZE/2 ? in.b : in.b - CORESIZE; sprintf(s,"%s.%s %s%5d , %s%5d", op_s, mo_s, ma_s, af, mb_s, bf); } void discore(insn_t* core, int start, int end, unsigned int CORESIZE ) { int adr; char line[MAX_ALL_CHARS]; for ( adr = start; adr < end; adr++ ) { dis1( line, core[adr], CORESIZE ); printf("%4d %s\n", adr, line); } }