/* File:      string_xsb.c  -- string manipulation stuff
** Author(s): kifer
** Contact:   xsb-contact@cs.sunysb.edu
** 
** Copyright (C) The Research Foundation of SUNY, 1999
** 
** XSB is free software; you can redistribute it and/or modify it under the
** terms of the GNU Library General Public License as published by the Free
** Software Foundation; either version 2 of the License, or (at your option)
** any later version.
** 
** XSB is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
** FOR A PARTICULAR PURPOSE.  See the GNU Library General Public License for
** more details.
** 
** You should have received a copy of the GNU Library General Public License
** along with XSB; if not, write to the Free Software Foundation,
** Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**
** $Id: string_xsb.c,v 1.15 2002/08/07 15:42:13 lfcastro Exp $
** 
*/

#include "xsb_config.h"
#include "xsb_debug.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "auxlry.h"
#include "cell_xsb.h"
#include "error_xsb.h"
#include "psc_xsb.h"
#include "hash_xsb.h"
#include "tries.h"
#include "choice.h"
#include "deref.h"
#include "memory_xsb.h"
#include "heap_xsb.h"
#include "register.h"
#include "flags_xsb.h"
#include "binding.h"
#include "builtin.h"
#include "cinterf.h"

#include "sp_unify_xsb_i.h"
#include "string_xsb.h"

extern char *p_charlist_to_c_string(prolog_term term, VarString *outstring, 
				    char *in_func, char *where);
extern void c_string_to_p_charlist(char *name, prolog_term list,
				   char *in_func, char *where);

static Cell term, term2, term3;

static XSB_StrDefine(input_buffer);
static XSB_StrDefine(subst_buf);
static XSB_StrDefine(output_buffer);


#include "ptoc_tag_xsb_i.h"


xsbBool str_cat(void)
{
  static char *str1, *str2, *tmpstr;

  term = ptoc_tag(1);
  term2 = ptoc_tag(2);
  if (isatom(term) && isatom(term2)) {
    str1 = string_val(term);
    str2 = string_val(term2);
    
    tmpstr = (char *)malloc(strlen(str1) + strlen(str2) + 1);
    strcpy(tmpstr, str1);
    strcat(tmpstr, str2);
    str1 = string_find(tmpstr, 1);
    free(tmpstr);
    return atom_unify(makestring(str1), ptoc_tag(3));
  } else return FALSE;
}


/*
  Input:
      Arg1: +Substr
      Arg2: + String
      Arg3: +forward/reverse (checks only f/r)
        f means the first match from the start of String
	r means the first match from the end of String
  Output:
      Arg4: Beg
        Beg is the offset where Substr matches. Must be a variable or an
	integer
      Arg5: End
	End is the offset of the next character after the end of Substr
	Must be a variable or an integer.

      Both Beg and End can be negative, in which case they represent the offset
      from the 2nd character past the end of String.
      For instance, -1 means the next character past the end of String,
      so End = -1 means that Substr must be a suffix of String..

      The meaning of End and of negative offsets is consistent with substring
      and string_substitute predicates.
*/
xsbBool str_match(void)
{
  static char *subptr, *stringptr, *direction, *matchptr;
  static int substr_beg, substr_end;
  int reverse=TRUE; /* search in reverse */
  int beg_bos_offset=TRUE; /* measure beg offset from the beg of string */
  int end_bos_offset=TRUE; /* measure end offset from the beg of string */
  int str_len, sub_len; /* length of string and substring */
  Cell beg_offset_term, end_offset_term;

  term = ptoc_tag(1);
  term2 = ptoc_tag(2);
  term3 = ptoc_tag(3);
  beg_offset_term = ptoc_tag(4);
  end_offset_term = ptoc_tag(5);
  if (!isatom(term) || !isatom(term2) || !isatom(term3)) {
    xsb_abort("STR_MATCH: Arguments 1,2,3 must be bound to strings");
  }
  subptr = string_val(term);
  stringptr = string_val(term2);
  direction = string_val(term3);

  if (*direction == 'f')
    reverse=FALSE;
  else if (*direction != 'r')
    xsb_abort("STR_MATCH: Argument 3 must be bound to forward/reverse");

  str_len=strlen(stringptr);
  sub_len=strlen(subptr);

  if (isinteger(beg_offset_term)|isboxedinteger(beg_offset_term)) {
    if (int_val(beg_offset_term) < 0) {
      beg_bos_offset = FALSE;
    }
  }
  if (isinteger(end_offset_term)|isboxedinteger(end_offset_term)) {
    if (int_val(end_offset_term) < 0) {
      end_bos_offset = FALSE;
    }
  }

  if (reverse)
    matchptr = xsb_strrstr(stringptr, subptr);
  else
    matchptr = strstr(stringptr, subptr);

  if (matchptr == NULL) return FALSE;

  substr_beg = (beg_bos_offset?
		matchptr - stringptr : -(str_len - (matchptr - stringptr))
		);
  substr_end = (end_bos_offset?
		(matchptr - stringptr) + sub_len
		: -(str_len + 1 - (matchptr - stringptr) - sub_len)
		);
  
  return
    (p2p_unify(beg_offset_term, makeint(substr_beg))
     && p2p_unify(end_offset_term, makeint(substr_end)));
}




/* XSB string substitution entry point
   In: 
      Arg1: string
      Arg2: beginning offset
      Arg3: ending offset. `_' or -1: end of string, -2: char before last, etc.
   Out:
      Arg4: new (output) string
   Always succeeds, unless error.
*/
xsbBool substring(void)
{
  /* Prolog args are first assigned to these, so we could examine the types
     of these objects to determine if we got strings or atoms. */
  prolog_term input_term, output_term;
  prolog_term beg_offset_term, end_offset_term;
  char *input_string=NULL;    /* string where matches are to be found */
  int beg_offset=0, end_offset=0, input_len=0, substring_len=0;
  int conversion_required=FALSE;

  XSB_StrSet(&output_buffer,"");

  input_term = reg_term(1);  /* Arg1: string to find matches in */
  if (isatom(input_term)) /* check it */
    input_string = string_val(input_term);
  else if (islist(input_term)) {
    input_string = p_charlist_to_c_string(input_term, &input_buffer,
					  "SUBSTRING", "input string");
    conversion_required = TRUE;
  } else
    xsb_abort("[SUBSTRING] Arg 1 (the input string) must be an atom or a character list");

  input_len = strlen(input_string);

  /* arg 2: beginning offset */
  beg_offset_term = reg_term(2);
  if (! (isinteger(beg_offset_term)|isboxedinteger(beg_offset_term)))
    xsb_abort("[SUBSTRING] Arg 2 (the beginning offset) must be an integer");
  beg_offset = int_val(beg_offset_term);
  if (beg_offset < 0)
    beg_offset = 0;
  else if (beg_offset > input_len)
    beg_offset = input_len;

  /* arg 3: ending offset */
  end_offset_term = reg_term(3);
  if (isref(end_offset_term))
    end_offset = input_len;
  else if (! (isinteger(end_offset_term)|isboxedinteger(end_offset_term)))
    xsb_abort("[SUBSTRING] Arg 3 (the end offset) must be integer or _");
  else end_offset = int_val(end_offset_term);

  if (end_offset < 0)
    end_offset = input_len + 1 + end_offset;
  else if (end_offset > input_len)
    end_offset = input_len;
  else if (end_offset < beg_offset)
    end_offset = beg_offset;

  output_term = reg_term(4);
  if (! isref(output_term))
    xsb_abort("[SUBSTRING] Arg 4 (the output string) must be an unbound variable");

  /* do the actual replacement */
  substring_len = end_offset-beg_offset;
  XSB_StrAppendBlk(&output_buffer, input_string+beg_offset, substring_len);
  XSB_StrNullTerminate(&output_buffer);
  
  /* get result out */
  if (conversion_required)
    c_string_to_p_charlist(output_buffer.string, output_term,
			   "SUBSTRING", "Arg 4");
  else
    c2p_string(output_buffer.string, output_term);
  
  return(TRUE);
}


/* XSB string substitution entry point: replace substrings specified in Arg2
   with strings in Arg3.
   In: 
       Arg1: string
       Arg2: substring specification, a list [s(B1,E1),s(B2,E2),...]
       Arg3: list of replacement strings
   Out:
       Arg4: new (output) string
   Always succeeds, unless error.
*/
xsbBool string_substitute(void)
{
  /* Prolog args are first assigned to these, so we could examine the types
     of these objects to determine if we got strings or atoms. */
  prolog_term input_term, output_term;
  prolog_term subst_reg_term, subst_spec_list_term, subst_spec_list_term1;
  prolog_term subst_str_term=(prolog_term)0,
    subst_str_list_term, subst_str_list_term1;
  char *input_string=NULL;    /* string where matches are to be found */
  char *subst_string=NULL;
  prolog_term beg_term, end_term;
  int beg_offset=0, end_offset=0, input_len;
  int last_pos = 0; /* last scanned pos in input string */
  /* the output buffer is made large enough to include the input string and the
     substitution string. */
  int conversion_required=FALSE; /* from C string to Prolog char list */

  XSB_StrSet(&output_buffer,"");

  input_term = reg_term(1);  /* Arg1: string to find matches in */
  if (isatom(input_term)) /* check it */
    input_string = string_val(input_term);
  else if (islist(input_term)) {
    input_string = p_charlist_to_c_string(input_term, &input_buffer,
					  "STRING_SUBSTITUTE", "input string");
    conversion_required = TRUE;
  } else
    xsb_abort("[STRING_SUBSTITUTE] Arg 1 (the input string) must be an atom or a character list");

  input_len = strlen(input_string);

  /* arg 2: substring specification */
  subst_spec_list_term = reg_term(2);
  if (!islist(subst_spec_list_term) && !isnil(subst_spec_list_term))
    xsb_abort("[STRING_SUBSTITUTE] Arg 2 must be a list [s(B1,E1),s(B2,E2),...]");

  /* handle substitution string */
  subst_str_list_term = reg_term(3);
  if (! islist(subst_str_list_term))
    xsb_abort("[STRING_SUBSTITUTE] Arg 3 must be a list of strings");

  output_term = reg_term(4);
  if (! isref(output_term))
    xsb_abort("[STRING_SUBSTITUTE] Arg 4 (the output) must be an unbound variable");

  subst_spec_list_term1 = subst_spec_list_term;
  subst_str_list_term1 = subst_str_list_term;

  if (isnil(subst_spec_list_term1)) {
    XSB_StrSet(&output_buffer, input_string);
    goto EXIT;
  }
  if (isnil(subst_str_list_term1))
    xsb_abort("[STRING_SUBSTITUTE] Arg 3 must not be an empty list");

  do {
    subst_reg_term = p2p_car(subst_spec_list_term1);
    subst_spec_list_term1 = p2p_cdr(subst_spec_list_term1);

    if (!isnil(subst_str_list_term1)) {
      subst_str_term = p2p_car(subst_str_list_term1);
      subst_str_list_term1 = p2p_cdr(subst_str_list_term1);

      if (isatom(subst_str_term)) {
	subst_string = string_val(subst_str_term);
      } else if (islist(subst_str_term)) {
	subst_string = p_charlist_to_c_string(subst_str_term, &subst_buf,
					      "STRING_SUBSTITUTE",
					      "substitution string");
      } else 
	xsb_abort("[STRING_SUBSTITUTE] Arg 3 must be a list of strings");
    }

    beg_term = p2p_arg(subst_reg_term,1);
    end_term = p2p_arg(subst_reg_term,2);

    if (!(isinteger(beg_term)|isboxedinteger(beg_term)) || 
	!(isinteger(end_term)|isboxedinteger(end_term)))
      xsb_abort("[STRING_SUBSTITUTE] Non-integer in Arg 2");
    else{
      beg_offset = int_val(beg_term);
      end_offset = int_val(end_term);
    }
    /* -1 means end of string */
    if (end_offset < 0)
      end_offset = input_len;
    if ((end_offset < beg_offset) || (beg_offset < last_pos))
      xsb_abort("[STRING_SUBSTITUTE] Substitution regions in Arg 2 not sorted");

    /* do the actual replacement */
    XSB_StrAppendBlk(&output_buffer,input_string+last_pos,beg_offset-last_pos);
    XSB_StrAppend(&output_buffer, subst_string);
    
    last_pos = end_offset;

  } while (!isnil(subst_spec_list_term1));

  XSB_StrAppend(&output_buffer, input_string+end_offset);

 EXIT:
  /* get result out */
  if (conversion_required)
    c_string_to_p_charlist(output_buffer.string, output_term,
			   "STRING_SUBSTITUTE", "Arg 4");
  else
    c2p_string(output_buffer.string, output_term);
  
  return(TRUE);
}


/*
 * strrstr.c -- find last occurence of string in another string
 *
 */

char *xsb_strrstr(char *str, char *pat)
{
  size_t len, patlen;
  const char *p;
  
  len = strlen(str);
  patlen = strlen(pat);
  
  if (patlen > len)
    return NULL;
  for (p = str + (len - patlen); p >= str; --p)
    if (*p == *pat && strncmp(p, pat, patlen) == 0)
      return (char *) p;
  return NULL;
}


syntax highlighted by Code2HTML, v. 0.9.1