//  crm_expr_classify.c  - Controllable Regex Mutilator,  version v1.0
//  Copyright 2001-2006  William S. Yerazunis, all rights reserved.
//  
//  This software is licensed to the public under the Free Software
//  Foundation's GNU GPL, version 2.  You may obtain a copy of the
//  GPL by visiting the Free Software Foundations web site at
//  www.fsf.org, and a copy is included in this distribution.  
//
//  Other licenses may be negotiated; contact the 
//  author for details.  
//
//  include some standard files
#include "crm114_sysincludes.h"

//  include any local crm114 configuration file
#include "crm114_config.h"

//  include the crm114 data structures file
#include "crm114_structs.h"

//  include the routine declarations file
#include "crm114.h"

//  OSBF declarations
#include "crm114_osbf.h"

//    the command line argc, argv
extern int prog_argc;
extern char **prog_argv;

//    the auxilliary input buffer (for WINDOW input)
extern char *newinputbuf;

//    the globals used when we need a big buffer  - allocated once, used 
//    wherever needed.  These are sized to the same size as the data window.
extern char *inbuf;
extern char *outbuf;
extern char *tempbuf;

//     Dispatch a LEARN statement
//
int crm_expr_learn (CSL_CELL *csl, ARGPARSE_BLOCK *apb)
{
  char box_text [MAX_PATTERN];
  char errstr [MAX_PATTERN];
  long i;
  char *txt;
  long start;
  long len;
  int retval;
  long saved_ssfl;
  long long classifier_flags = 0;

  //            get start/length of the text we're going to learn:
  //
  crm_get_pgm_arg (box_text, MAX_PATTERN, apb->b1start, apb->b1len);

  //  Use crm_restrictvar to get start & length to look at.
  i = crm_restrictvar(box_text, apb->b1len, 
		      NULL,
		      &txt,
		      &start,
		      &len,
		      errstr);

  if ( i < 0)
    {
      long curstmt;
      long fev;
      fev = 0;
      curstmt = csl->cstmt;
      if (i == -1)
	fev = nonfatalerror (errstr, "");
      if (i == -2)
	fev = fatalerror (errstr, "");
      //
      //     did the FAULT handler change the next statement to execute?
      //     If so, continue from there, otherwise, we FAIL.
      if (curstmt == csl->cstmt)
	{
	  csl->cstmt = csl->mct[csl->cstmt]->fail_index - 1;
	  csl->aliusstk [ csl->mct[csl->cstmt]->nest_level ] = -1;
	};
      return (fev);
    };
  
  //  keep the original value of the ssfl, because many learners
  //  mangle it and then it won't work right for other classifiers
  saved_ssfl = sparse_spectrum_file_length;

  //            get our flags... the only ones we're interested in here
  //            are the ones that specify _which_ algorithm to use.
  classifier_flags = apb->sflags;

  //     Joe thinks that this should be a table or a loop.
  classifier_flags = classifier_flags &
    ( CRM_OSB_BAYES | CRM_CORRELATE | CRM_OSB_WINNOW | CRM_OSBF 
      | CRM_HYPERSPACE | CRM_ENTROPY | CRM_SVM | CRM_SKS | CRM_FSCM );
  
  if (classifier_flags & CRM_OSB_BAYES)
    {
      retval = crm_expr_osb_bayes_learn (csl, apb, txt, start, len); 
    }
  else
  if (classifier_flags & CRM_CORRELATE)
    {
      retval = crm_expr_correlate_learn (csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_OSB_WINNOW)
    {
      retval = crm_expr_osb_winnow_learn (csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_OSBF )
    {
      retval = crm_expr_osbf_bayes_learn (csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_HYPERSPACE)
    {
      retval = crm_expr_osb_hyperspace_learn(csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_ENTROPY)
    {
      retval = crm_expr_bit_entropy_learn(csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_SVM)
    {
      retval = crm_expr_svm_learn(csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_SKS)
    {
      retval = crm_expr_sks_learn(csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_FSCM)
    {
      retval = crm_expr_fscm_learn(csl, apb, txt, start, len);
    }
  else
    { 
      retval = crm_expr_markov_learn (csl, apb, txt, start, len);
    };

  sparse_spectrum_file_length = saved_ssfl;
  
  return (retval);
}

//      Dispatch a CLASSIFY statement
//
int crm_expr_classify (CSL_CELL *csl, ARGPARSE_BLOCK *apb)
{
  char box_text [MAX_PATTERN];
  char errstr [MAX_PATTERN];
  long i;
  char *txt;
  long start;
  long len;
  long retval;
  long long classifier_flags = 0;

  //            get start/length of the text we're going to classify:
  //
  crm_get_pgm_arg (box_text, MAX_PATTERN, apb->b1start, apb->b1len);

  //  Use crm_restrictvar to get start & length to look at.
  i = crm_restrictvar(box_text, apb->b1len, 
		      NULL,
		      &txt,
		      &start,
		      &len,
		      errstr);

  if ( i > 0)
    {
      long curstmt;
      long fev;
      fev = 0;
      curstmt = csl->cstmt;
      if (i == 1)
	fev = nonfatalerror (errstr, "");
      if (i == 2)
	fev = fatalerror (errstr, "");
      //
      //     did the FAULT handler change the next statement to execute?
      //     If so, continue from there, otherwise, we FAIL.
      if (curstmt == csl->cstmt)
	{
	  csl->cstmt = csl->mct[csl->cstmt]->fail_index - 1;
	  csl->aliusstk [ csl->mct[csl->cstmt]->nest_level ] = -1;
	};
      return (fev);
    };
  
  //            get our flags... the only ones we're interested in here
  //            are the ones that specify _which_ algorithm to use.
  classifier_flags = apb->sflags;
  
  classifier_flags = classifier_flags &
    ( CRM_OSB_BAYES | CRM_CORRELATE | CRM_OSB_WINNOW | CRM_OSBF 
      | CRM_HYPERSPACE | CRM_ENTROPY | CRM_SVM | CRM_SKS | CRM_FSCM );

  if (classifier_flags & CRM_OSB_BAYES)
    {
      retval = crm_expr_osb_bayes_classify (csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_CORRELATE)
    {
      retval = crm_expr_correlate_classify (csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_OSB_WINNOW)
    {
      retval = crm_expr_osb_winnow_classify (csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_OSBF )
    {
      retval = crm_expr_osbf_bayes_classify (csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_HYPERSPACE)
    {
      retval = crm_expr_osb_hyperspace_classify (csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_ENTROPY)
    {
      retval = crm_expr_bit_entropy_classify (csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_SVM)
    {
      retval = crm_expr_svm_classify (csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_SKS)
    {
      retval = crm_expr_sks_classify (csl, apb, txt, start, len);
    }
  else
  if (classifier_flags & CRM_FSCM)
    {
      retval = crm_expr_fscm_classify (csl, apb, txt, start, len);
    }
  else
    {
      retval = crm_expr_markov_classify (csl, apb, txt, start, len);
    };
  return (0);
}




syntax highlighted by Code2HTML, v. 0.9.1