/* File:      xsbpattern.c -- XSB-side interface to match() and substitute()
** Author(s): Jin Yu
** Contact:   xsb-contact@cs.sunysb.edu
** 
** Copyright (C) The Research Foundation of SUNY, 1998
** 
** XSB is free software; you can redistribute it and/or modify it under the
** terms of the GNU Library General Public License as published by the Free
** Software Foundation; either version 2 of the License, or (at your option)
** any later version.
** 
** XSB is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
** FOR A PARTICULAR PURPOSE.  See the GNU Library General Public License for
** more details.
** 
** You should have received a copy of the GNU Library General Public License
** along with XSB; if not, write to the Free Software Foundation,
** Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**
** $Id: xsbpattern.c,v 1.10 2003/04/15 17:16:24 kostis Exp $
** 
*/


/*----------------------------------------------------------------------------
       try_match__() -- find the match pattern,
       next_match__() -- find the next match pattern,
       do_bulk_match__() -- find the global match patterns,
       perl_substitute__() -- substitute the string with expected pattern,
       load_perl__() -- load the perl interpretor,
       unload_perl__() -- release the perl interpretor object,
       get_match_resultC__() -- get the perl pattern match C function,
       get_bulk_match_result__() --  get Perl global pattern match results.
       
----------------------------------------------------------------------------*/


#include "interface.h"
#include "perlpattern.c"          /* pattern match basic functions */   

void build_sub_match_spec( void );
int is_global_pattern( char *);
int global_pattern_mode = FALSE;

extern void xsb_abort(char *, ...);

#define xsb_warn(warning)	fprintf(stderr, "++Warning: %s\n", warning)


/*----------------------------------------------------------------------------
try_match__()
The pattern matching function which includes loading perl interpreter and 
trying the perl pattern matching.
arguments: 
  input: char* string,    -- input text
	 char* pattern    --  match pattern
  output:if no match found, return FAILURE (0).
----------------------------------------------------------------------------*/
int try_match__( void )
{
  SV *text;        /* the storage for the string in embedded Perl */
  SV *string_buff; /* the storage for the string in embedded Perl */
  int was_match;   /* number of the matches */
  char *string = ptoc_string(1),
    *pattern = ptoc_string(2);

  /* first load the perl interpreter, if unloaded */
  if (perlObjectStatus == UNLOADED) load_perl__();

  text = newSV(0);
  string_buff = newSV(0);
  sv_setpv(text, string);  /* store the string in the SV */
    
  was_match = match(text, pattern );
  
  global_pattern_mode = is_global_pattern(pattern);
  
  SvREFCNT_dec(string_buff);
  SvREFCNT_dec(text);
  
  return(was_match);
}


/*----------------------------------------------------------------------------
next_match__()
The pattern match function which repeats pattern match after 
the pattern match of the function try_match__().
If there is no calling of function try_match__() before, give warning! 
   output: if no match found, return FAILURE.
----------------------------------------------------------------------------*/
int next_match__( void )
{
  int was_match;        /* return code */

   if ( matchPattern == NULL ) { /*didn't try_match__ before*/
     xsb_warn("call try_match/2 first!");
     was_match = FAILURE;
   }
   else /*do next match*/
     was_match = match_again( );

   if (global_pattern_mode)
     return(was_match);
   /* always fail, if Perl pattern is not global */
   return FAILURE;
}

/*----------------------------------------------------------------------------
do_bulk_match__()
The pattern match function which includes loading perl interpreter and 
doing the global perl pattern match, and storing the results in the global 
array of bulkMatchList.
argument: 
  input: char* string	     	     -- input text
	 char* pattern	     	     --  match pattern
  output: int* num_match     	     --  the number of the matches	 
----------------------------------------------------------------------------*/
int do_bulk_match__( void )
{
  AV *match_list;           /* AV storage of matches list*/
  SV *text;                 /* storage for the embedded perl cmd */
  SV *string_buff;          /* storage for the embedded perl cmd */
  int num_match;            /* the number of the matches */
  int i;
 
  /* first load the perl interpreter, if unloaded */
  if (perlObjectStatus == UNLOADED) load_perl__();

  text = newSV(0);
  string_buff = newSV(0);
  sv_setpv(text, ptoc_string(1));  /*put the string into an SV */
 
  /*------------------------------------------------------------------------
    free the old match list space and allocate new space for current match list
    -----------------------------------------------------------------------*/
  for ( i=0; i<preBulkMatchNumber; i++ ) 
    free(bulkMatchList[i]);
  if (bulkMatchList != NULL ) free(bulkMatchList);
  bulkMatchList = NULL;   

  /*------------------------------------------------------------------------
    do bulk match
    ----------------------------------------------------------------------*/
  num_match = all_matches(text, ptoc_string(2),&match_list);
    
  /* allocate the space to store the matches */
  if ( num_match != 0 ) {
    preBulkMatchNumber = num_match; /* reset the pre bulk match number */
    bulkMatchList = (char **)malloc(num_match*sizeof(char *)); 
    if ( bulkMatchList == NULL ) 
      xsb_abort("Cannot alocate memory to store the results for bulk match");
  }

  /*get the matches from the AV */
  for ( i=0;i<num_match;i++ ) {
    string_buff = av_shift(match_list);
    bulkMatchList[i] = (char *)malloc( strlen(SvPV(string_buff,PL_na))+1 ); 
    strcpy((char *)bulkMatchList[i], SvPV(string_buff,PL_na) );   
  } 

  SvREFCNT_dec(string_buff); /* release space*/
  SvREFCNT_dec(text);
  
  ctop_int(3, num_match);           /*return the number of matches*/
  return SUCCESS;
}

/*----------------------------------------------------------------------------
perl_substitute__()
The pattern substitution function which includes loading perl interpreter 
and doing the pattern substitution, then returning the replaced string.
arguments: 
  input: char* string, input text
	 char* pattern, match pattern
  output:char* string, output text
----------------------------------------------------------------------------*/
int perl_substitute__( void )
{
  SV *text;    /* Perl representation for the string to be 
		  modified by substitution */ 
  char *subst_cmd = ptoc_string(2);
  int i;                
  
  /* first load the perl interpreter, if unloaded */
  if (perlObjectStatus == UNLOADED) load_perl__();
  
  text = newSV(0);
  sv_setpv(text, ptoc_string(1));  /* put the string to the SV */
     
  if( !substitute(&text, subst_cmd) )
    return(FAILURE);
  
  global_pattern_mode = is_global_pattern(subst_cmd);

  if (substituteString != NULL ) free(substituteString);

  substituteString = malloc(strlen(SvPV(text,PL_na))+1);
  strcpy(substituteString,SvPV(text,PL_na));
  
  SvREFCNT_dec(text);  /*release space*/
  
  ctop_string(3, string_find(substituteString,1));  /*return changed text*/
  return SUCCESS;
}

/*----------------------------------------------------------------------------
load_perl__():
The function to implement the interface of C and Perl, load the perl 
interpreter and initial the global variables. Then the Perl function
is ready to run.
----------------------------------------------------------------------------*/

int load_perl__( void ) 
{
  char *embedding[] = {"","-e","0"};  /* perl interpreter config params */
  int i;

  /* check if the perl interpreter is loaded already*/
  if ( perlObjectStatus == LOADED ) return SUCCESS;

  /*------------------------------------------------------------------------
    initial the global variables
    ----------------------------------------------------------------------*/
  for ( i=0; i<MAX_TOTAL_MATCH; i++ ) 
    matchResults[i] = NULL;  
  preBulkMatchNumber = 0;
  bulkMatchList = NULL;
  matchPattern = NULL;
  substituteString = NULL;
  build_sub_match_spec();    /*build the submatch arguments string constant*/  

  my_perl = perl_alloc();
  perl_construct( my_perl );
  perl_parse( my_perl, NULL, 3, embedding, (char **)NULL );
  perl_run(my_perl);

  perlObjectStatus = LOADED;

  return (SUCCESS); 
}

/*---------------------------------------------------------------------------
unload_perl__():
The function to release the Perl interpreter, and deallocat the memory
---------------------------------------------------------------------------*/

int unload_perl__( void )
{
  int i;

  PL_perl_destruct_level = 1;
  perl_destruct( my_perl );
  perl_free( my_perl );

  /*-------------------------------------------------------------------------
   free all the space allocated for perl match functions
  -------------------------------------------------------------------------*/
  for ( i=0; i<preBulkMatchNumber; i++ ) 
    free(bulkMatchList[i]);
  if (bulkMatchList != NULL ) free(bulkMatchList);
  if (matchPattern != NULL ) free(matchPattern);
  if (substituteString != NULL ) free(substituteString);
  free(subMatchSpec);

  perlObjectStatus = UNLOADED;

  return SUCCESS;
}  

/*----------------------------------------------------------------------------
get_bulk_match_result__(order, argumentValue):
The function to get the values of all the matches.
input: is the position of the argument;
output: is the string of match result.
----------------------------------------------------------------------------*/

int get_bulk_match_result__( void ) {

  if (perlObjectStatus == UNLOADED ) {
    load_perl__();
    return(FAILURE);
  }

  if ( bulkMatchList[ptoc_int(1)] == NULL )
    return FAILURE;        /*no match*/
  else{
    int match_seq_number= ptoc_int(1);
    int match_array_sz= ptoc_int(3);
    if (match_seq_number < match_array_sz) {
      /* c2p_string( bulkMatchList[match_seq_number], reg_term(2)); */
      ctop_string(2, (char *)string_find(bulkMatchList[match_seq_number],1));
      return SUCCESS;
    }
    else return FAILURE;
  }
}


/*----------------------------------------------------------------------------
get_match_resultC__(matchCode, matchResult):
Get the value of the submatch string $1, $2, ... from 
the global string array of matchResults

input: is the match code. Match codes correspond to Perl match variables as
follows: 
             -1 -- $&
             -2 -- $`
             -3 -- $'
             -4 -- $+ 
              1 -- $1
              2 -- $2
              3 -- $3
              ....
              9 -- $9
              .... MAX_SUB_MATCH

output: the string of match result.

The results of the matches (the values of Perl vars) are in consecutive
cells of the matchResults array as follows:
     cell#  Perl var
       0 -- $&
       1 -- $`
       2 -- $'
       3 -- $+
       4 -- $1
       5 -- $2
        .......

----------------------------------------------------------------------------*/

int get_match_resultC__( void ) {

  int order; 
 
  int submatch_number=ptoc_int(1);
  
  /*--------------------------------------------------------------------------
    Convert from Prolog-side convention for refering to submatches to
    the corresponding  array index numbers in match result storage.
  --------------------------------------------------------------------------*/
  switch (submatch_number) {
  case MATCH:     /*MATCH = -1*/
    order = 0;    /* actual position in the memory */
    break;
  case PREMATCH:  /*PREMATCH = -2*/
    order = 1;
    break; 
  case POSTMATCH:  /*POSTMATCH = -3*/
    order = 2;
    break;
  case LAST_PAREN_MATCH:  /*LAST_PAREN_MATCH = -4*/
    order = 3;
    break;
  default:
    if ( submatch_number > MAX_SUB_MATCH ) {
      char message[120];
      sprintf(message,
	      "Specified submatch number %d exceeds the limit: %d\n",
	      submatch_number, MAX_SUB_MATCH);
      xsb_warn(message);
      order = -99;
    }
    else order = submatch_number+3;  /* actual position in the memory */
    break;
  }

  if (order == -99) return(FAILURE);

  if ( matchPattern == NULL ) { /*didn't try_match before*/
     xsb_warn("Call try_match/2 first!");
     return(FAILURE);
   } else if ( !strcmp(matchResults[order],"") || matchResults[order] == NULL )
     return(FAILURE);           /*no match found, return FAILURE */
  else {
    c2p_string( matchResults[order], reg_term(2));
    return(SUCCESS);
  }
}

/*----------------------------------------------------------------------------
void build_sub_match_string( void )
This function is used to build the submatch arguments list string, 
"($&,$`,$',$+,$1,$2,$3.....,$MAX_SUB_MATCH)"
here the value of MAX_SUB_MATCH is defined in the include file
----------------------------------------------------------------------------*/

void build_sub_match_spec( void ) {

  int i,j,k;          /*counter flags*/
  int spaceSize;      /*memory space size for the submatch string*/

  /*get the size of the submatch string, the size of string $1, $2 etc., is 2,
    when the digit is bigger than 10, the size of string $10, $11 etc., is 3,
    so whenever the digit increases by 10, the size of the string $digit will
    increase by 1. Following code is to calculate the size of string $1,$2...*/
  j = 1;
  k = 10;
  spaceSize=0;
  for (i=1;i<=MAX_SUB_MATCH;i++) {
    if ( i%k==0 ) {
      j++;
      k*=10;
    }
    spaceSize += 2+j;    /* the size of ",$" is 2 */
  }
  spaceSize+=(sizeof(FIXEDSUBMATCHSPEC)+1);


  /*build the submatch string*/
  subMatchSpec=(char *)malloc(spaceSize);
  strcpy(subMatchSpec, FIXEDSUBMATCHSPEC); /*build the fixed part $&,$`,$',$+*/

  /* add string $1, $2 etc., to the end of the string */
  for (i=1; i<=MAX_SUB_MATCH;i++)
    sprintf(&(subMatchSpec[strlen(subMatchSpec)]), ",$%d\0", i);
            /*add one of $1, $2 etc., to the string each time by order*/
  strcat(subMatchSpec, ")");

  return;
  
}


/* Check if the Perl pattern is global, i.e., contains the `g' modifier.
** This is needed so that next_match will know that it has to fail immediately,
** if no `g' has been specified.
*/
int is_global_pattern(char *pattern) {
  int len = strlen(pattern), i = len-1;

  /* skip other Perl pattern modifiers and spaces */
  while ( (i > 0) &&
	  ( *(pattern+i) == ' ' || *(pattern+i) == '\t'
	    || *(pattern+i) == 'o' || *(pattern+i) == 'i' ))
    i--;

  if (*(pattern+i) == 'g')
    return TRUE;
  return FALSE;

}


syntax highlighted by Code2HTML, v. 0.9.1