/******************************************************************************
 * $Id: mlex.c,v 1.7 2005/10/24 22:43:58 gareuselesinge Exp $
 * This file is part of liberopops (http://liberopops.sf.net)                 *
 * This file is distributed under the terms of GNU GPL license.               *
 ******************************************************************************/

/******************************************************************************
 * File description:
 *	Markup Language EXpressions
 * Notes:
 *	This version supports optional tags/strings and the backtrack
 *	tends to be a bit slower.
 * Authors:
 * 	Enrico Tassi <gareuselesinge@users.sourceforge.net>
 ******************************************************************************/

#include <string.h>
#include <time.h>

#include "regularexp.h"
#include "list.h"
#include "mlex.h"

#include "log.h"
#define LOG_ZONE "MLEX"

//#define DEBUG_MLEX 1

#define HIDDEN static

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  */
/*** local types/macro  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  */
/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  */

#define TOK_STR		0
#define TOK_TAG		1
#define TOK_OPT_TAG	2
#define TOK_OPT_STR	4

#define MATCHES 	1
#define NOT_MATCHES 	0

//! used to represent answers
struct answer_t
	{
	list_t* start;
	int len;
	list_t* dust_lengths;
	};

//! used for tokenization
struct token_t
	{
	int start,stop;
	short tag;
	int dustlen;
	};

//! used for backtracking
struct back_t
	{
	list_t* dust_lengths;
	list_t* position_stream;
	list_t* position_expr;
	int len;
	};

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  */
/*** prototypes divided by section  *  *  *  *  *  *  *  *  *  *  *  *  *  *  */
/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  */

/*** tokenization ***/
HIDDEN void regexec_my(char* s,regmatch_t *p, int *bag);
HIDDEN void regexec_myext(char* s,regmatch_t *p);
HIDDEN list_t *tokenize_html(char* t);
HIDDEN list_t *tokenize_exp(char* t);

/*** matching ***/
HIDDEN regmatch_t token_match(struct token_t* t,char* s,char* exp);
HIDDEN int token_match_token(struct token_t* t,char* s,struct token_t *t1,
	char* s1);
HIDDEN unsigned int mlmatch_find(list_t *data,list_t* pattern,char* str,
	char* exp,list_t*saved_pattern,list_t*stack,int len);
HIDDEN int is_a_keep(void* x,char* str);

/*** result refinement ***/
HIDDEN list_t* epurate(list_t* data,int len,list_t* dl,char* txt,list_t* get,
	char* str);
HIDDEN list_t* mlmatch_epurate(list_t* ans,char* txt, list_t* get,char* str);

/*** result handling ***/
HIDDEN struct chunk_t* mlmatch_get_x(list_t* l,int pos);
HIDDEN list_t* mlmatch_get_y(list_t* l,int pos);
HIDDEN char *mlmatch_get(struct chunk_t*c,char* str);

/*** aux ***/
HIDDEN void mlmatch_print_results_aux(list_t*res,char* str);
HIDDEN list_t* mlmatch_aux(list_t *data,list_t* pattern,char* str,char* exp,
	int min_len);		
/*** helpers ***/
HIDDEN void restore_dusts(list_t* orig, list_t* copy);
HIDDEN list_t * copy_dusts(list_t *orig);
HIDDEN void clean_stack(list_t* stack);
HIDDEN void reset_dustlen(list_t* pattern);
HIDDEN int exp_min_len(list_t* l);

/*** free ***/
HIDDEN void free_sublist(void* l);
HIDDEN void free_answer(void *f);
	
/*** new ***/
HIDDEN void * new_int(int i);
HIDDEN list_t* new_dust_lengths(list_t* exp);
HIDDEN struct chunk_t * new_chunk(int start,int stop);
HIDDEN struct answer_t* new_answer(list_t* start,int len,list_t* exp);
HIDDEN struct token_t *new_token(int start,int stop,short tags);

/*** print ***/
HIDDEN void print_token(struct token_t *c, char *s);
HIDDEN void print_toklist(list_t*l, char *s, int i);
HIDDEN void print_toklistn(list_t*l, char *s,int n);
HIDDEN void print_chunk(struct chunk_t*c, char* str);
#ifdef DEBUG_MLEX
HIDDEN void print_int(void*x);
#endif
HIDDEN void print_anslist(list_t *a,char* str);

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  */
/*** the code  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  */
/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  */

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// section: tokenization ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * to be used wit an HTML string, finds the fist token
 * a token is a tag, or a comment or a <script></script>
 *
 */ 
#define MODE_SCRIPT 1
#define MODE_PLAIN 0
HIDDEN __inline__ int scriptmatch(char* s){
int pos = 0;

while (s[pos] == ' ') pos++;
if (s[pos] == '/') pos++;

if (s[pos] == '\0' || (s[pos] != 's' && s[pos] != 'S')) return MODE_PLAIN;
pos++;
if (s[pos] == '\0' || (s[pos] != 'c' && s[pos] != 'C')) return MODE_PLAIN;
pos++;
if (s[pos] == '\0' || (s[pos] != 'r' && s[pos] != 'R')) return MODE_PLAIN;
pos++;
if (s[pos] == '\0' || (s[pos] != 'i' && s[pos] != 'I')) return MODE_PLAIN;
pos++;
if (s[pos] == '\0' || (s[pos] != 'p' && s[pos] != 'P')) return MODE_PLAIN;
pos++;
if (s[pos] == '\0' || (s[pos] != 't' && s[pos] != 'T')) return MODE_PLAIN;

return MODE_SCRIPT;
}

HIDDEN void regexec_my(char* s,regmatch_t *p, int* mode)
{
int pos;
int dust;
int sm = MODE_PLAIN;

p->begin = -1;
p->end = -1;
	
for(pos = 0 ; !(
	s[pos] == '\0' || 
	(s[pos] == '<' && (((sm=scriptmatch(&s[pos+1])) == MODE_SCRIPT) || 
	 *mode == MODE_PLAIN ))) ; pos++);
	
if(s[pos] == '\0')
	return;

#ifdef DEBUG_MLEX
printf("STOP: s[pos] == %c, *mode = %d, next are %c%c%c%c%c%c sm = %d\n",
	s[pos],*mode,s[pos+1],s[pos+2],s[pos+3],s[pos+4],s[pos+5],s[pos+6],sm);
#endif

p->begin  = pos;

dust=0;
if(*mode != MODE_SCRIPT && !strncmp(&s[pos],"<!--",4))
	dust=1;

pos++;

while(1)
	{
	if(s[pos] == '\0')
		break;
	else if (s[pos] == '>')
		{
		if(dust == 1)
			{
			if(s[pos-1] == '-' && s[pos-2] == '-')
				break;
			}
		else 
			{
			if (*mode == MODE_SCRIPT)
				{
				if (scriptmatch(&s[pos-6]))
					break;
				} 
			else
				break;
			}
		}
	pos++;
	}

if(s[pos] == '\0')
	{
	p->begin = -1;
	return;
	}

if ( *mode == MODE_SCRIPT)
	*mode = MODE_PLAIN;
else if ( *mode == MODE_PLAIN)
	*mode = sm;

p->end = pos+1;
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * to be used wit an HTML string, finds the fist token
 * a token is a <tag>, a {tag} or a [string] 
 */
HIDDEN void regexec_myext(char* s,regmatch_t *p)
{
int pos;
p->begin = -1;
p->end = -1;
	
for(pos = 0 ; s[pos] != '<' && s[pos] != '{' && s[pos] != '\0' ; pos++);

if(s[pos] == '\0')
	return;
	
p->begin  = pos;

for(; s[pos] != '>' && s[pos] != '}' && s[pos] != '\0' ; pos++);

if(s[pos] == '\0')
	{
	p->begin = -1;
	return;
	}

p->end = pos+1;

}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * Splits an html string in tokens
 * 
 */
HIDDEN list_t *tokenize_html(char* t)
{
list_t * l = NULL,*tl=NULL;
int position=0;
regmatch_t p[1];
unsigned int len = strlen(t);

int bag = MODE_PLAIN;

do	{
	p[0].begin = -1;
	p[0].end = -1;
	regexec_my(&t[position],p,&bag); // faster, but not so much
	if(p[0].end != -1) 
		{
		int start1,start2,stop1,stop2;
	
		start2 = position+p[0].begin;
		stop2= position+p[0].end;

		start1=position;
		stop1=position+p[0].begin;
	
		l=list_add_fast(l,&tl,new_token(start1,stop1,0));
		if(start2+1 < stop2-1)
			l=list_add_fast(l,&tl,
				new_token(start2+1,stop2-1,TOK_TAG));
		}
	position+=p[0].end;
	} 
while(p[0].begin != -1 && position < len);

return l;
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * Splits an expression string in tokens
 * 
 */
HIDDEN list_t *tokenize_exp(char* t)
{
list_t * l = NULL,*tl=NULL;
int position=0;
regmatch_t p[1];

do	{
	p[0].begin = -1;
	p[0].end = -1;
	regexec_myext(&t[position],p); // faster, but not so much
	if(p[0].end != -1) 
		{
		int start1,start2,stop1,stop2;
	
		start2 = position+p[0].begin;
		stop2= position+p[0].end;

		start1=position;
		stop1=position+p[0].begin;
	
		if (t[start1] == '[')
			l=list_add_fast(l,&tl,
				new_token(start1+1,stop1-1,TOK_OPT_STR));	
		else
			l=list_add_fast(l,&tl,new_token(start1,stop1,0));
			
		if(start2+1 < stop2-1)
			{
			int tag = TOK_TAG;

			if (t[start2] == '{')
				tag |= TOK_OPT_TAG;
			
			l=list_add_fast(l,&tl,new_token(start2+1,stop2-1,tag));
			}
		}
	position+=p[0].end;
	} 
while(p[0].begin != -1);

return l;
}

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// section: matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * is x a keep 'X' or a drop 'O' ?
 *
 */
HIDDEN int is_a_keep(void* x,char* str)
{
struct token_t* t = (struct token_t*)x;

#ifdef DEBUG_MLEX
printf("ACTING ");
print_token(x,str);
printf("\n");
#endif

if( t->start == t->stop)
	return 0;
else if( str[t->start] == 'X')
	return 1;
else if( str[t->start] == 'O')
	return 0;
else if(str[t->start] == '\0')
	return 0;
else 
	{
	DBG("we got a '%c'\n",str[t->start]);
	DBG("string was '%s'\n",&str[t->start]);
	ERROR_PRINT("Internal error : not X nor O (O of Ohio, not 0 of zero)");
	return 0;
	}
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * regularexp match on a token t on string s of 
 * exprssion exp
 *
 */
HIDDEN regmatch_t token_match(struct token_t* t,char* s,char* exp)
{
char tmp;
regmatch_t p;

tmp=s[t->stop];
s[t->stop]='\0';
p=regfind(&s[t->start],exp);
s[t->stop]=tmp;
return p;
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * regularexp match on a token t on string s
 * of expression identifyed by t1 on s1
 *
 */

HIDDEN int token_match_token(struct token_t* t,char* s,
		struct token_t *t1,char* s1)
{
char tmp;
regmatch_t p1;

if((t->tag & TOK_TAG) != (t1->tag & TOK_TAG))
	return NOT_MATCHES;

tmp=s1[t1->stop];
s1[t1->stop]='\0';
p1=token_match(t,s,&s1[t1->start]);
s1[t1->stop]=tmp;

#ifdef DEBUG_MLEX
printf("# %d : ",(t1->tag & (TOK_OPT_TAG|TOK_OPT_STR)));
printf("Comparing ");
print_token(t,s);
printf(" with ");
print_token(t1,s1);
printf(" -- %s\n",p1.begin != -1 ? "OK!" : "FAILED");
#endif

if(p1.begin != -1)
	return MATCHES;
else
	return NOT_MATCHES;
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * finds a sublist of data that matches pattern
 * 
 */ 
HIDDEN unsigned int mlmatch_find(list_t *data,list_t* pattern,
		char* str,char* exp,list_t* pattern_save,list_t* stack,int len)
{
if(pattern == NULL )
	{
	clean_stack(stack);
	
	return 0;
	}
else if (pattern != NULL && data == NULL)
	{
	clean_stack(stack);
	
	return 0;
	}
else 
	{
	struct token_t* t1= (struct token_t*)data->data;
	struct token_t* t2= (struct token_t*)pattern->data;
	int rc;


	if(t2->tag & TOK_OPT_TAG || t2->tag & TOK_OPT_STR)
		{
		struct back_t * back;

		back = malloc(sizeof(struct back_t));
		MALLOC_CHECK(back);
	
		back->dust_lengths = copy_dusts(pattern_save);
		back->position_stream = data;
		back->position_expr = pattern->next;
		back->len = len;

		stack = list_push(stack,back);
		}
			
	rc = token_match_token(t1,str,t2,exp);

	if(rc == MATCHES)
		{
		if(t2->tag & TOK_OPT_TAG || t2->tag & TOK_OPT_STR)
			t2->dustlen = 1;//mark used optional
		
		return 1+mlmatch_find(data->next,pattern->next,str,exp,
			pattern_save,stack,len+1);
		}
	else
		{
		// we have no chaces if it isn't an optional tag
		// we may backtrack
		struct back_t * back;

		back = list_head(stack);
		stack = list_pop(stack);

		if(back != NULL)
			{
			list_t* position_stream = back->position_stream;
			list_t* position_expr = back->position_expr;
			int old_len = back -> len;
			
			restore_dusts(pattern_save,back->dust_lengths);
			
			//free 
			list_free(back->dust_lengths,free);
			free(back);
			
			return -(len - old_len)+
				mlmatch_find(position_stream,
					position_expr,str,exp,
					pattern_save,stack,old_len);
			}
		
		return 0;
		}
	
	}
}

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// section: result refinement ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * returns oly the 'X' of data (new list)
 *
 */
HIDDEN list_t* epurate(list_t* data,int len,list_t* dl,char* txt,
	list_t* get,char* str)
{
int check;
	
if( (data == NULL && get == NULL) || len == 0)
	return NULL;

check = dl != NULL && 
	get != NULL && 
	get->data != NULL &&
	dl->data != NULL &&
	(((struct token_t*)get->data)->tag & (TOK_OPT_TAG|TOK_OPT_STR));

if ( check && *((int*)dl->data) == 0)
	{
	//skip optional tag not matched
	return epurate(data,len,dl->next,txt,get->next,str);
	}
else if( data == NULL || get == NULL)
	{
	ERROR_ABORT("internal error: len(ans) != len(get)");
	}
else 
	{
	if(is_a_keep(get->data,str))
		{
		list_t* tmp;
		int start=((struct token_t*)(data->data))->start;
		int stop=((struct token_t*)(data->data))->stop;
		
		tmp = malloc(sizeof(list_t));
		MALLOC_CHECK(tmp);

		tmp->data=new_chunk(start,stop);
		if ( check )
			tmp->next=epurate(data->next,len-1,
				dl->next,txt,get->next,str);
		else
			tmp->next=epurate(data->next,len-1,
				dl,txt,get->next,str);
		
		return tmp;
		}
	else
		{
#ifdef DEBUG_MLEX
		printf("(%d)droppping ",len);
		print_token(((struct token_t*)(data->data)),txt);
		printf("\n");
#endif		
		if ( check)
			return epurate(data->next,len-1,
				dl->next,txt,get->next,str);
		else
			return epurate(data->next,len-1,dl,txt,get->next,str);
		}
	}
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * selects X in the answer
 * 
 */
HIDDEN list_t* mlmatch_epurate(list_t* ans,char* txt, list_t* get,char* str)
{
if (ans == NULL)	
	return NULL;
else
	{
	list_t* tmp;
	
	tmp = malloc(sizeof(list_t));
	MALLOC_CHECK(tmp);

#ifdef DEBUG_MLEX
	printf("epuration: ");	
	list_visit(((struct answer_t *)ans->data)->dust_lengths,print_int);
#endif
	
	tmp->data=epurate(((struct answer_t *)ans->data)->start,
			((struct answer_t *)ans->data)->len,
			((struct answer_t *)ans->data)->dust_lengths,
			txt,get,str);
	
	tmp->next=mlmatch_epurate(ans->next,txt,get,str);
	return tmp;
	}
}

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// section: result handling ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * returns an element
 *
 */
HIDDEN struct chunk_t* mlmatch_get_x(list_t* l,int pos)
{
if(l != NULL)
	{
	if(pos == 0)
		return ((struct chunk_t*)l->data);
	else
		return mlmatch_get_x(l->next,pos-1);
	}
else
	{
	//DBG("you are asking for %d\n",pos);
	//ERROR_PRINT("Internal: wrong position\n");
	return NULL;
	}

	
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * returns a row
 *
 */
HIDDEN list_t* mlmatch_get_y(list_t* l,int pos)
{
if(l != NULL)
	{
	if(pos == 0)
		return ((list_t*)l->data);
	else
		return mlmatch_get_y(l->next,pos-1);
	}
else
	{
	//DBG("you are asking for %d\n",pos);
	//ERROR_PRINT("Internal: wrong position\n");
	return NULL;
	}
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * strdups str[c]
 * 
 */
HIDDEN char *mlmatch_get(struct chunk_t*c,char* str)
{
char tmp;
char *rc;
tmp = str[c->stop];
str[c->stop]='\0';
rc = strdup(&str[c->start]);
MALLOC_CHECK(rc);
str[c->stop]=tmp;
return rc;
}


//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// section: aux ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * helper for recursion
 *
 */
HIDDEN void mlmatch_print_results_aux(list_t*res,char* str)
{
if(res != NULL)
	{
	print_chunk((struct chunk_t*)(res->data),str);
	if(res->next != NULL)
		printf(",");
	mlmatch_print_results_aux(res->next,str);
	}
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * helper for recursion
 *
 */ 
HIDDEN list_t* mlmatch_aux(list_t *data,list_t* pattern,char* str,char* exp, 
	int min_len)
{
if (data==NULL)
	return NULL;
else
	{
	int rc;
	rc = mlmatch_find(data,pattern,str,exp,pattern,NULL,0);

	//FIX may be faster if not recalculated each time
	if(rc >= min_len) 
		{
		list_t* tmp;
		tmp = malloc(sizeof(list_t));
		MALLOC_CHECK(tmp);
		
		tmp->data=new_answer(data,rc,pattern);
		MALLOC_CHECK(tmp->data);
		tmp->next=(mlmatch_aux(data->next,pattern,str,exp,min_len));

		reset_dustlen(pattern);
		return tmp;
		}
	else
		{
		reset_dustlen(pattern);
		return mlmatch_aux(data->next,pattern,str,exp,min_len);
		}
	}
}

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// section: helpers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * to restore old status in backtracking
 * 
 */ 
HIDDEN void restore_dusts(list_t* orig, list_t* copy)
{
if(orig != NULL && copy != NULL)
	{
	if (((struct token_t*)orig->data)->tag & (TOK_OPT_TAG|TOK_OPT_STR))
		{
		((struct token_t *)orig->data)->dustlen = 
			*((int *)copy->data);
		restore_dusts(orig->next,copy->next);
		}
	else
		restore_dusts(orig->next,copy);
	}
	
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * save status for backtrack
 * 
 */ 
HIDDEN list_t * copy_dusts(list_t *orig)
{
if (orig==NULL) 
	return NULL;
else if (((struct token_t*)orig->data)->tag & (TOK_OPT_TAG|TOK_OPT_STR))
	{
	list_t* tmp;
	tmp = malloc(sizeof(list_t));
	tmp->data = new_int(((struct token_t *)orig->data)->dustlen);
	tmp->next = copy_dusts(orig->next);
	return tmp;
	}
else
	return copy_dusts(orig->next);
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * clear the stack
 * 
 */ 
HIDDEN void clean_stack(list_t* stack)
{
if(stack != NULL)
	{
	struct back_t* back = list_head(stack);
	list_free(back->dust_lengths,free);
	free(back);
	clean_stack(list_pop(stack));
	}
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * sets all to 0
 *
 */ 
HIDDEN void reset_dustlen(list_t* pattern)
{
if(pattern != NULL)
	{
	((struct token_t*)pattern->data)->dustlen = 0;
	reset_dustlen(pattern->next);
	}
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * counts non optional tags in a expr
 *
 */ 
HIDDEN int exp_min_len(list_t* l)
{
if(l==NULL)
	return 0;
else
	{
	if(((struct token_t*)l->data)->tag & (TOK_OPT_TAG|TOK_OPT_STR))
		return exp_min_len(l->next);
	else
		return 1+exp_min_len(l->next);
	}
}

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// section: free ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * free answer_t
 *
 */
HIDDEN void free_answer(void *f)
{
list_free(((struct answer_t *)f)->dust_lengths,free);
free(f);
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * helper
 *
 */
HIDDEN void free_sublist(void* l)
{
list_free((list_t*)l,free);
}

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// section: new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * creates a new chunk_t
 *
 */
HIDDEN struct chunk_t * new_chunk(int start, 
		int stop)
{
struct chunk_t * tmp;

tmp = malloc(sizeof(struct chunk_t));
MALLOC_CHECK(tmp);

tmp->start=start;
tmp->stop=stop;

return tmp;
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * creates a new token
 *
 */ 
HIDDEN struct token_t *new_token(int start,int stop,short tags)
{
struct token_t *tmp;

tmp = (struct token_t *)malloc(sizeof(struct token_t));
MALLOC_CHECK(tmp);

tmp->start = start;
tmp->stop = stop;
tmp->tag = tags;
tmp->dustlen=0;

return tmp;
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * builds a new int 
 *
 */ 
HIDDEN void * new_int(int i)
{
int * x;
x = malloc(sizeof(int));
MALLOC_CHECK(x);
*x = i;
return x;
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * builds a new dustlen list 
 *
 */ 
HIDDEN list_t* new_dust_lengths(list_t* exp)
{
if(exp == NULL)
	return NULL;
else
	{
	if( ((struct token_t*)exp->data)->tag & (TOK_OPT_TAG|TOK_OPT_STR))
		{
		list_t* tmp = malloc(sizeof(list_t));
		MALLOC_CHECK(tmp);
		
		tmp->data = new_int(((struct token_t*)exp->data)->dustlen);
		MALLOC_CHECK(tmp->data);
		
		((struct token_t*)exp->data)->dustlen=0;
		tmp->next=new_dust_lengths(exp->next);
		
		return tmp;
		}
	else
		return new_dust_lengths(exp->next);
	}
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * builds a new answer 
 *
 */
HIDDEN struct answer_t* new_answer(list_t* start,int len,list_t* exp)
{
struct answer_t*tmp;

tmp = malloc(sizeof(struct answer_t));
MALLOC_CHECK(tmp);

tmp->start=start;
tmp->len=len;
tmp->dust_lengths = new_dust_lengths(exp);

#ifdef DEBUG_MLEX
printf("-- a new answer of len %d\n",len);
list_visit(tmp->dust_lengths,print_int);
printf("\n");
#endif

return tmp;
}

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// section: print ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * prints an int 
 *
 */
#ifdef DEBUG_MLEX
HIDDEN void print_int(void*x)
{
printf("%d,",*(int*)x);
}
#endif 

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * prints a token c on s
 *
 */
HIDDEN void print_token(struct token_t *c, char *s)
{
char tmp;
tmp = s[c->stop];
s[c->stop]='\0';
if(c->tag & TOK_OPT_TAG)
	{
	if(c->dustlen != 0)
		printf("{%s}*",&s[c->start]);
	else
		printf("{%s}",&s[c->start]);
	}
else if(c->tag & TOK_OPT_STR)
	{
	if(c->dustlen != 0)
		printf("[%s]*%d",&s[c->start],c->dustlen);
	else
		printf("[%s]",&s[c->start]);
	}
else if(c->tag & TOK_TAG)	
	printf("<%s>",&s[c->start]);
else
	printf("'%s'",&s[c->start]);

s[c->stop]=tmp;
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * prints the whole list
 *
 */
HIDDEN void print_toklist(list_t*l, char *s, int i)
{
if(l != NULL)
	{
	print_token((struct token_t *)l->data,s);
	if(l->next != NULL)
		printf(" (%d),",i);
	print_toklist(l->next,s,i+1);
	}
else
	printf("\n\n");
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * prints first n elements only
 *
 */
HIDDEN void print_toklistn(list_t*l, char *s,int n)
{
if(l != NULL && n != 0)
	{
	print_token((struct token_t *)l->data,s);
	if(l->next != NULL && n!= 1)
		printf(",");
	print_toklistn(l->next,s,n-1);
	}
else
	printf("\n\n");
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * the answer list
 * 
 */ 
HIDDEN void print_anslist(list_t *a,char* str)
{
if (a != NULL)
	{
	print_toklistn(((struct answer_t*)a->data)->start,str,
		((struct answer_t*)a->data)->len);
	print_anslist(a->next,str);
	}
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * prints str[c]
 *
 */
HIDDEN void print_chunk(struct chunk_t*c, char* str)
{
char tmp;
tmp = str[c->stop];
str[c->stop]='\0';
printf("'%s'",&str[c->start]);
str[c->stop]=tmp;
}

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// section: exported ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * return a strdup of res[x,y]
 */
char* mlmatch_get_result(int x,
		int y,list_t* res,char* s)
{
struct chunk_t* tmp = mlmatch_get_x(mlmatch_get_y(res,y),x);

if(tmp == NULL)
	{
	//DBG("you are asking for %d %d here :\n",x,y);
	//mlmatch_print_results(res,s);
	return NULL;
	}
	
return mlmatch_get(tmp,s);
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * see the documentation in the .h
 *
 */ 
list_t* mlmatch(char* str, char* exp, char* get)
{
list_t * l = NULL;
list_t * r = NULL;
list_t * a = NULL;
list_t * g = NULL;
list_t * x = NULL;
int len_g,len_r,min_len_g,min_len_r;

l=tokenize_html(str);
r=tokenize_exp(exp);
g=tokenize_exp(get);

#ifdef DEBUG_MLEX
printf("print STRING\n");
print_toklist(l,str,0);
printf("print EXPRESSION\n");
print_toklist(r,exp,0);
printf("print GET\n");
print_toklist(g,get,0);
printf("print FILE\n");
printf("%s",str);
printf("\n");
#endif

len_g = list_len(g);
len_r = list_len(r);
min_len_g = exp_min_len(g);
min_len_r = exp_min_len(r);

//check for wrong args
if(len_g != len_r)
	{
	DBG("Internal: len(exp)=%d != len(get)=%d",
		len_r,len_g);
	ERROR_PRINT("list_len(g) != list_len(r)\n");
	goto abort;
	}
if(min_len_g != min_len_r)
	{
	DBG("Internal: min_len(exp)=%d != min_len(get)=%d",
		min_len_r,min_len_g);
	ERROR_PRINT("exp_min_len(g) != exp_min_len(r)\n");
	goto abort;
	}

//check for ambiguous matches
if ( len_g > 2 * min_len_g )
	{
	DBG("Internal: ambiguous match, too many optional tags\n");
	DBG("Internal: concrete tags are %d, while optionals are %d\n",
		min_len_g,len_g);
	}

a = mlmatch_aux(l,r,str,exp,min_len_r);

#ifdef DEBUG_MLEX
printf("print ANSWERS\n");
print_anslist(a,str);
printf("\n");
#endif

x = mlmatch_epurate(a,str,g,get);

list_free(a,free_answer);
abort:
list_free(l,free);
list_free(r,free);
list_free(g,free);

return x;
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * debug function
 *
 */
void mlmatch_print_results(list_t*res,char* str)
{
if(res != NULL)
	{
	printf("{");
	mlmatch_print_results_aux((list_t*)res->data,str);
	printf("}\n");
	mlmatch_print_results(res->next,str);
	}
}

/***  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  *  **
 * freedom!! 
 *
 */
void mlmatch_free_results(list_t*res)
{
list_free(res,free_sublist);
}

/* eof */


syntax highlighted by Code2HTML, v. 0.9.1