//  crm114_structs.h  - Controllable Regex Mutilator structures, version X0.1
//  Copyright 2001 William S. Yerazunis, all rights reserved.
//  
//  This software is licensed to the public under the Free Software
//  Foundation's GNU GPL, version 1.0.  You may obtain a copy of the
//  GPL by visiting the Free Software Foundations web site at
//  www.fsf.org .  Other licenses may be negotiated; contact the 
//  author for details.  
//
long vht_size;
 
long cstk_limit;

long max_pgmlines; 

long max_pgmsize;

long max_pgmsize;

long user_trace;

long internal_trace;

long debug_countdown;

long cmdline_break;

long cycle_counter;

long ignore_environment_vars;

long data_window_size;

long sparse_spectrum_file_length;

long microgroom_chain_length ;

long microgroom_stop_after;

float min_pmax_pmin_ratio;

long profile_execution;

long prettyprint_listing;  //  0= none, 1 = basic, 2 = expanded, 3 = parsecode

long engine_exit_base;  //  All internal errors will use this number or higher;
                       //  the user programs can use lower numbers freely.


//        how should math be handled?
//        = 0 no extended (non-EVAL) math, use algebraic notation
//        = 1 no extended (non-EVAL) math, use RPN
//        = 2 extended (everywhere) math, use algebraic notation
//        = 3 extended (everywhere) math, use RPN
long q_expansion_mode;


//   structure of a vht cell
//  note - each file gets an entry, with the name of the file
//  being the name of the variable - no colons! 
//
//  also note that there's no "next" pointer in a vht cell; this is because
//  we do in-table overflowing (if a table entry is in use, we use the next
//  available table entry, wrapping around.  It's easy to change in any case.
//  
typedef struct mythical_vht_cell {
  char *filename;        // file where defined (or NULL)
  int filedesc;         // filedesc of defining file (or NULL)
  char *nametxt;        // block of text that hosts the variable name
  long nstart;          // index into nametxt to start of varname
  long nlen;            // length of name 
  char *valtxt;         // text block that hosts the captured value
                        // vstart, vlen, mstart, and mlen are all measured
                        // from the _start_ of valtxt, mstart relative to
                        // vstart, etc!!!
  long vstart;          // zero-base index of start of variable (inclusive)
  long vlen;            // length of captured value : this plus vstart is where
                        //  you could put a NULL if you wanted to.
  long mstart;          // zero-base start of most recent match of this var
  long mlen;            // length of most recent match against this var; this
                        //   plus mstart is where you could put a NULL if you
                        //    wanted to.
  long linenumber;      // linenumber of this variable (if known, else -1)
} VHT_CELL;

//   The argparse block is filled in at run time, though at least in
//    principle it could be done at microcompile time, but var-expansion 
//     needs to be done at statement execution time..  so we don't fill it
//      in till we have to, then we cache the result.
//


typedef struct mythical_argparse_block {
  char *a1start;
  long a1len;
  char *p1start;
  long p1len;
  char *p2start;
  long p2len;
  char *p3start;
  long p3len;
  char *b1start;
  long b1len;
  char *s1start;
  long s1len;
  char *s2start;
  long s2len;
  long long sflags;
} ARGPARSE_BLOCK;



// structure of a microcompile table cell (one such per statement)
//
//  These table entries get filled in during microcompile operation.
//
typedef struct mythical_mct_cell {
  char *hosttxt;         // text file this statement lives in.
  ARGPARSE_BLOCK *apb;   // the argparse block for this statement
  long start;            // zero-base index of start of statement (inclusive)
  long fchar;            // zero-base index of non-blank stmt (for prettyprint)
  long achar;            // zero-base index of start of args;
  long stmt_utime;       // user time spent in this statement line;
  long stmt_stime;       // system time spent in this statement line;
  int stmt_type;         // statement type of this line
  int nest_level;        // nesting level of this statement
  int fail_index;        // if this statement failed, where would we go?
  int liaf_index;        // if this statement liafed, where would we go?
  int trap_index;        // if this statement faults, where would we go?
  int stmt_break;        // 1 if "break" on this stmt, 0 otherwise.
} MCT_CELL;

// structure of a control stack level cell.  
//   Nota Bene:  CSL cells are used to both retain toplevel data about
//   any particular file being executed as well as being used to retain
//   data on any file that is data!  If a file is executable, then the 
//   mct pointer is a pointer to the compiled MCT table, else the mct
//   pointer is a NULL and the file is not executable.
//
typedef struct mythical_csl_cell {
  char *filename;        //filename if any
  long rdwr;             // 0=readonly, 1=rdwr
  long filedes;          //  file descriptor it's open on (if any)
  char *filetext;        //  text buffer
  long nchars;           //  characters of data we have
  unsigned long hash;    //  hash of this data (if done)
  MCT_CELL **mct;        //  microcompile (if compiled)
  long nstmts;           //  how many statements in the microcompile
  long preload_window;   //  do we preload the window or not?
  long cstmt;            //  current executing statement of this file
  void *caller;          //  pointer to this file's caller (if any)
  long return_vht_cell;  //  index into the VHT to stick the return value
  long calldepth;        //  how many calls deep is this stack frame
  long aliusstk[MAX_BRACKETDEPTH]; // the status stack for ALIUS
} CSL_CELL;

typedef struct {
  unsigned long hash;
  unsigned long key;
  unsigned long value;
} FEATUREBUCKET_STRUCT;


typedef struct {
  unsigned char version[4];
  unsigned long flags;
  unsigned long skip_to;
} FEATURE_HEADER_STRUCT;


typedef struct {
  unsigned long hash;
  unsigned long key;
  float value;
} WINNOW_FEATUREBUCKET_STRUCT;

#define ENTROPY_RESERVED_HEADER_LEN 1024
typedef struct {
  long firlatstart;
  long firlatlen;
  long nodestart;
  long nodeslen;
  long long totalbits;
} ENTROPY_HEADER_STRUCT;

typedef struct mythical_entropy_alphabet_slot {
  long count;
  long nextcell;
} ENTROPY_ALPHABET_SLOT;

//  28 byte header, 24 bytes alph (52 tot).  Pare: 16 header, 16 alph (36 tot)
typedef struct mythical_entropy_cell {
  double fir_prior;
  long fir_larger;
  long fir_smaller;
  long firlat_slot;
  //  long total_count;
  ENTROPY_ALPHABET_SLOT abet[ENTROPY_ALPHABET_SIZE];
} ENTROPY_FEATUREBUCKET_STRUCT;


typedef struct mythical_tms_struct {
  clock_t tms_utime;  // user time
  clock_t tms_stime;  // system time 
  clock_t tms_cutime; // user time of children
  clock_t tms_cstime; // system time of children 
} TMS_STRUCT;


//  define statement types for microcompile
//
#define CRM_BOGUS 0
#define CRM_NOOP 1           
#define CRM_EXIT 2
#define CRM_OPENBRACKET 3
#define CRM_CLOSEBRACKET 4
#define CRM_LABEL 5
#define CRM_GOTO 6
#define CRM_MATCH 7
#define CRM_FAIL 8
#define CRM_LIAF 9
#define CRM_ACCEPT 10
#define CRM_TRAP 11
#define CRM_FAULT 12
#define CRM_INPUT 13
#define CRM_OUTPUT 14
#define CRM_WINDOW 15
#define CRM_ALTER 16
#define CRM_CALL 17
#define CRM_ROUTINE 18
#define CRM_RETURN 19
#define CRM_SYSCALL 20
#define CRM_LEARN 21
#define CRM_CLASSIFY 22
#define CRM_ISOLATE 23
#define CRM_HASH 24
#define CRM_INTERSECT 25
#define CRM_UNION 26
#define CRM_EVAL 27
#define CRM_ALIUS 28
#define CRM_TRANSLATE 29
#define CRM_DEBUG 30
#define CRM_CLUMP 31         // make clusters out of tokens
#define CRM_PMULC 32         // pmulc translates tokens to cluster names
#define CRM_UNIMPLEMENTED 33


//      FLAGS FLAGS FLAGS
//       all of the valid CRM114 flags are listed here
//
//      GROT GROT GROT - You must keep this in synchrony with the 
//      definitions of the keywords in crm_stmt_parser!!!  Yes, I'd
//      love to define it in one place and one place only, but I haven't
//      figured out a way to do that well.

//     match searchstart flags
#define CRM_FROMSTART     (1 << 0)
#define CRM_FROMNEXT      (1 << 1)
#define CRM_FROMEND       (1 << 2)
#define CRM_NEWEND        (1 << 3)
#define CRM_FROMCURRENT   (1 << 4)
//         match control flags
#define CRM_NOCASE        (1 << 5)
#define CRM_ABSENT        (1 << 6)
#define CRM_BASIC         (1 << 7)
#define CRM_BACKWARDS     (1 << 8)
#define CRM_LITERAL       (1 << 9)
#define CRM_NOMULTILINE   (1 << 10)      // should be merged with byline
//         input/output/window flags
#define CRM_BYLINE        (1 << 10)      //  Should be merged with nomultiline
#define CRM_BYCHAR        (1 << 11)
#define CRM_BYCHUNK       (1 << 12)
#define CRM_BYEOF         (1 << 13)
#define CRM_EOFACCEPTS    (1 << 14)
#define CRM_EOFRETRY      (1 << 15)
#define CRM_APPEND        (1 << 16)
//           process control flags
#define CRM_KEEP          (1 << 17)
#define CRM_ASYNC         (1 << 18)
//        learn and classify
#define CRM_REFUTE        (1 << 19)
#define CRM_MICROGROOM    (1 << 20)
#define CRM_MARKOVIAN     (1 << 21)   
#define CRM_OSB_BAYES     (1 << 22)       // synonym with OSB feature gen
#define CRM_OSB           CRM_OSB_BAYES
#define CRM_CORRELATE     (1 << 23)
#define CRM_OSB_WINNOW    (1 << 24)      //  synonym to Winnow feature combiner
#define CRM_WINNOW        CRM_OSB_WINNOW   
#define CRM_CHI2          (1 << 25)
#define CRM_UNIQUE        (1 << 26)
#define CRM_ENTROPY       (1 << 27)
#define CRM_OSBF          (1 << 28)     // synonym with OSBF local rule
#define CRM_OSBF_BAYES    CRM_OSBF
#define CRM_HYPERSPACE    (1 << 29)
#define CRM_UNIGRAM       (1 << 30)
#define CRM_CROSSLINK     (1 << 31)
//
//        Flags that need to be sorted back in
//           input
#define CRM_READLINE      (1LL << 32)
//           isolate flags
#define CRM_DEFAULT       (1LL << 33) 
//           SKS classifier
#define CRM_SKS          (1LL << 34)
//           SVM classifier
#define CRM_SVM           (1LL << 35)
//           FSCM classifier
#define CRM_FSCM          (1LL << 36)
//  
//     and a struct to put them in.
typedef struct 
{
  char * string;
  unsigned long long value;
} FLAG_DEF ;


//*****************************************************************
//
//     The following table describes the statements allowed in CRM114.
//
//      Each entry is one line of STMT_TABLE_TYPE, and gives the text
//      representation of the command, the internal dispatch code, 
//      whether the statement is "executable" or not, what the minimum
//      and maximum number of slash-groups, paren-groups, and box-groups
//      are for the statement to make sense, and what flags are allowed
//      for that statement.
//

typedef struct
{
  char *stmt_name;
  int stmt_code;
  int namelen;
  int is_executable;
  int minslashes;
  int maxslashes;
  int minparens;
  int maxparens;
  int minboxes;
  int maxboxes;
  int flags_allowed_mask;
} STMT_TABLE_TYPE;


//     The compiler file actually contains this "for real", the 
//      extern here is merely a reference to it.
//
#ifndef BASE_COMPILER_TABLE_HERE
extern STMT_TABLE_TYPE stmt_table[];
#endif


//   these defines are for arg type... note that they must remain synched
//   IN THIS ORDER with the start chars and end chars in crm_statement_parse
//
#define CRM_ANGLES 0
#define CRM_PARENS 1
#define CRM_BOXES  2
#define CRM_SLASHES 3



//   The possible exit codes
#define CRM_EXIT_OK 0
#define CRM_EXIT_ERROR 1
#define CRM_EXIT_FATAL 2
#define CRM_EXIT_APOCALYPSE 666


//   The ORable exec codes for crm_zexpandvar; OR together the ones
//   you want to enable for zexpandvar.  Nexpandvar is ansi|stringvar|redirect,
//   and qexpandvar is "all of them".  :)
#define CRM_EVAL_ANSI               0x01
#define CRM_EVAL_STRINGVAR          0x02
#define CRM_EVAL_REDIRECT           0x04
#define CRM_EVAL_STRINGLEN          0x08
#define CRM_EVAL_MATH               0x10



//    The possible cache actions
#define CRM_MMAP_CACHE_UNUSED 0
//   active makes it really mapped (or reactivates a released mmap)
#define CRM_MMAP_CACHE_ACTIVE 1
//   release marks the slot reusable, but doesn't unmap (yet)
#define CRM_MMAP_CACHE_RELEASE 2
//   drop really unmaps
#define CRM_MMAP_CACHE_DROP 3     




syntax highlighted by Code2HTML, v. 0.9.1