/*
 *  libzvbi - Teletext page cache search functions
 *
 *  Copyright (C) 2000, 2001, 2002 Michael H. Schimek
 *  Copyright (C) 2000, 2001 Iņaki G. Etxebarria
 *
 *  Based on code from AleVT 1.5.1
 *  Copyright (C) 1998, 1999 Edgar Toernig <froese@gmx.de>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/* $Id: search.c,v 1.12 2006/05/18 16:52:25 mschimek Exp $ */

#ifdef HAVE_CONFIG_H
#  include "config.h"
#endif

#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "lang.h"
#include "cache.h"
#include "search.h"
#include "ure.h"
#include "vbi.h"

/**
 * @addtogroup Search
 * @ingroup Cache
 * @brief Search the Teletext page cache.
 */

#if defined(HAVE_GLIBC21) || defined(HAVE_LIBUNICODE)

struct vbi_search {
	vbi_decoder *		vbi;

	int			start_pgno;
	int			start_subno;
	int			stop_pgno[2];
	int			stop_subno[2];
	int			row[2], col[2];

	int			dir;

	vbi_bool		(* progress)(vbi_page *pg);

	vbi_page		pg;

	ure_buffer_t		ub;
	ure_dfa_t		ud;
	ucs2_t			haystack[25 * (40 + 1) + 1];
};

#define SEPARATOR 0x000A

#define FIRST_ROW 1
#define LAST_ROW 24

static void
highlight(struct vbi_search *s, vt_page *vtp,
	  ucs2_t *first, long ms, long me)
{
	vbi_page *pg = &s->pg;
	ucs2_t *hp;
	int i, j;

	hp = s->haystack;

	s->start_pgno = vtp->pgno;
	s->start_subno = vtp->subno;
	s->row[0] = LAST_ROW + 1;
	s->col[0] = 0;

	for (i = FIRST_ROW; i < LAST_ROW; i++) {
		vbi_char *acp = &pg->text[i * pg->columns];

		for (j = 0; j < 40; acp++, j++) {
			int offset = hp - first;
 
			if (offset >= me) {
				s->row[0] = i;
				s->col[0] = j;
				return;
			}

			if (offset < ms) {
				if (j == 39) {
					s->row[1] = i + 1;
					s->col[1] = 0;
				} else {
					s->row[1] = i;
					s->col[1] = j + 1;
				}
			}

			switch (acp->size) {
			case VBI_DOUBLE_SIZE:
				if (offset >= ms) {
					acp[pg->columns].foreground = 32 + VBI_BLACK;
					acp[pg->columns].background = 32 + VBI_YELLOW;
					acp[pg->columns + 1].foreground = 32 + VBI_BLACK;
					acp[pg->columns + 1].background = 32 + VBI_YELLOW;
				}

				/* fall through */

			case VBI_DOUBLE_WIDTH:
				if (offset >= ms) {
					acp[0].foreground = 32 + VBI_BLACK;
					acp[0].background = 32 + VBI_YELLOW;
					acp[1].foreground = 32 + VBI_BLACK;
					acp[1].background = 32 + VBI_YELLOW;
				}

				hp++;
				acp++;
				j++;

				break;

			case VBI_DOUBLE_HEIGHT:
				if (offset >= ms) {
					acp[pg->columns].foreground = 32 + VBI_BLACK;
					acp[pg->columns].background = 32 + VBI_YELLOW;
				}

				/* fall through */

			case VBI_NORMAL_SIZE:
				if (offset >= ms) {
					acp[0].foreground = 32 + VBI_BLACK;
					acp[0].background = 32 + VBI_YELLOW;
				}

				hp++;
				break;

			default:
				/* skipped */
				/* hp++; */
				break;
			}
		}

		hp++;
	}
}

static int
search_page_fwd(void *p, vt_page *vtp, vbi_bool wrapped)
{
	vbi_search *s = p;
	vbi_char *acp;
	int row, _this, start, stop;
	ucs2_t *hp, *first;
	unsigned long ms, me;
	int flags, i, j;

	_this = (vtp->pgno << 16) + vtp->subno;
	start = (s->start_pgno << 16) + s->start_subno;
	stop  = (s->stop_pgno[0] << 16) + s->stop_subno[0];

	if (start >= stop) {
		if (wrapped && _this >= stop)
			return -1; /* all done, abort */
	} else if (_this < start || _this >= stop)
		return -1; /* all done, abort */

	if (vtp->function != PAGE_FUNCTION_LOP)
		return 0; /* try next */

	if (!vbi_format_vt_page(s->vbi, &s->pg, vtp, s->vbi->vt.max_level, 25, 1))
		return -3; /* formatting error, abort */

	if (s->progress)
		if (!s->progress(&s->pg)) {
			if (_this != start) {
				s->start_pgno = vtp->pgno;
				s->start_subno = vtp->subno;
				s->row[0] = FIRST_ROW;
				s->row[1] = LAST_ROW + 1;
				s->col[0] = s->col[1] = 0;
			}

			return -2; /* canceled */
		}

	/* To Unicode */

	hp = s->haystack;
	first = hp;
	row = (_this == start) ? s->row[0] : -1;
	flags = 0;

	if (row > LAST_ROW)
		return 0; /* try next page */

	for (i = FIRST_ROW; i < LAST_ROW; i++) {
		acp = &s->pg.text[i * s->pg.columns];

		for (j = 0; j < 40; acp++, j++) {
			if (i == row && j <= s->col[0])
				first = hp;

			if (acp->size == VBI_DOUBLE_WIDTH
			    || acp->size == VBI_DOUBLE_SIZE) {
				/* "ZZAAPPZILLA" -> "ZAPZILLA" */
				acp++; /* skip left half */
				j++;
			} else if (acp->size > VBI_DOUBLE_SIZE) {
				/* skip */
				/* *hp++ = 0x0020; */
				continue;
			}

			*hp++ = acp->unicode;
			flags = URE_NOTBOL;
		}

		*hp++ = SEPARATOR;
		flags = 0;
	}

	/* Search */

	if (first >= hp)
		return 0; /* try next page */
/*
fprintf(stderr, "exec: %x/%x; start %d,%d; %c%c%c...\n",
	vtp->pgno, vtp->subno,
	s->row[0], s->col[0],
	_vbi_to_ascii (first[0]),
	_vbi_to_ascii (first[1]),
	_vbi_to_ascii (first[2])
);
*/
	if (!ure_exec(s->ud, flags, first, hp - first, &ms, &me))
		return 0; /* try next page */

	highlight(s, vtp, first, ms, me);

	return 1; /* success, abort */
}

static int
search_page_rev(void *p, vt_page *vtp, vbi_bool wrapped)
{
	vbi_search *s = p;
	vbi_char *acp;
	int row, this, start, stop;
	unsigned long ms, me;
	ucs2_t *hp;
	int flags, i, j;

	this  = (vtp->pgno << 16) + vtp->subno;
	start = (s->start_pgno << 16) + s->start_subno;
	stop  = (s->stop_pgno[1] << 16) + s->stop_subno[1];

	if (start <= stop) {
		if (wrapped && this <= stop)
			return -1; /* all done, abort */
	} else if (this > start || this <= stop)
		return -1; /* all done, abort */

	if (vtp->function != PAGE_FUNCTION_LOP)
		return 0; /* try next page */

	if (!vbi_format_vt_page(s->vbi, &s->pg, vtp, s->vbi->vt.max_level, 25, 1))
		return -3; /* formatting error, abort */

	if (s->progress)
		if (!s->progress(&s->pg)) {
			if (this != start) {
				s->start_pgno = vtp->pgno;
				s->start_subno = vtp->subno;
				s->row[0] = FIRST_ROW;
				s->row[1] = LAST_ROW + 1;
				s->col[0] = s->col[1] = 0;
			}

			return -2; /* canceled */
		}

	/* To Unicode */

	hp = s->haystack;
	row = (this == start) ? s->row[1] : 100;
	flags = 0;

	if (row < FIRST_ROW)
		goto break2;

	for (i = FIRST_ROW; i < LAST_ROW; i++) {
		acp = &s->pg.text[i * s->pg.columns];

		for (j = 0; j < 40; acp++, j++) {
			if (i == row && j >= s->col[1])
				goto break2;

			if (acp->size == VBI_DOUBLE_WIDTH
			    || acp->size == VBI_DOUBLE_SIZE) {
				/* "ZZAAPPZILLA" -> "ZAPZILLA" */
				acp++; /* skip left half */
				j++;
			} else if (acp->size > VBI_DOUBLE_SIZE) {
				/* skip */
				/* *hp++ = 0x0020; */
				continue;
			}

			*hp++ = acp->unicode;
			flags = URE_NOTEOL;
		}

		*hp++ = SEPARATOR;
		flags = 0;
	}
break2:

	if (hp <= s->haystack)
		return 0; /* try next page */

	/* Search */

	ms = me = 0;

	for (i = 0; s->haystack + me < hp; i++) {
		unsigned long ms1, me1;
/*
fprintf(stderr, "exec: %x/%x; %d, %d; '%c%c%c...'\n",
	vtp->pgno, vtp->subno, i, me,
	_vbi_to_ascii (s->haystack[me + 0]),
	_vbi_to_ascii (s->haystack[me + 1]),
	_vbi_to_ascii (s->haystack[me + 2])
);
*/
		if (!ure_exec(s->ud, (me > 0) ? (flags | URE_NOTBOL) : flags,
		    s->haystack + me, hp - s->haystack - me, &ms1, &me1))
			break;

		ms = me + ms1;
		me = me + me1;
	}

	if (i == 0)
		return 0; /* try next page */

	highlight(s, vtp, s->haystack, ms, me);

	return 1; /* success, abort */
}

/**
 * @param search vbi_search context.
 * 
 * Delete the search context created by vbi_search_new().
 */
void
vbi_search_delete(vbi_search *search)
{
	if (!search)
		return;

	if (search->ud)
		ure_dfa_free(search->ud);

	if (search->ub)
		ure_buffer_free(search->ub);

	free(search);
}

static size_t
ucs2_strlen(const void *string)
{
	const ucs2_t *p = (ucs2_t *) string;
	size_t i = 0;

	if (!string)
		return 0;

	for (i = 0; *p; i++)
		p++;

	return i;
}

/**
 * @param vbi Initialized vbi decoding context.
 * @param pgno 
 * @param subno Page and subpage number of the first (forward) or
 *   last (backward) page to visit. Optional @c VBI_ANY_SUBNO. 
 * @param pattern The Unicode (UCS-2, <em>not</em> UTF-16) search
 *   pattern, a 0-terminated string.
 * @param casefold Boolean, search case insensitive.
 * @param regexp Boolean, the search pattern is a regular expression.
 * @param progress A function called for each page scanned, can be
 *   \c NULL. Shall return @c FALSE to abort the search. @a pg is valid
 *   for display (e. g. @a pg->pgno), do <em>not</em> call
 *   vbi_unref_page() or modify this page.
 * 
 * Allocate a vbi_search context and prepare for searching
 * the Teletext page cache. The context must be freed with
 * vbi_search_delete().
 * 
 * Regular expression searching supports the standard set
 * of operators and constants, with these extensions:
 *
 * <table>
 * <tr><td>\x....</td><td>hexadecimal number of up to 4 digits</td></tr>
 * <tr><td>\X....</td><td>hexadecimal number of up to 4 digits</td></tr>
 * <tr><td>\u....</td><td>hexadecimal number of up to 4 digits</td></tr>
 * <tr><td>\U....</td><td>hexadecimal number of up to 4 digits</td></tr>
 * <tr><td>:title:</td><td>Unicode specific character class</td></tr>
 * <tr><td>:gfx:</td><td>Teletext G1 or G3 graphics</td></tr>
 * <tr><td>:drcs:</td><td>Teletext DRCS</td></tr>
 * <tr><td>\pN1,N2,...,Nn</td><td>Character properties class</td></tr>
 * <tr><td>\PN1,N2,...,Nn</td><td>Negated character properties class</td></tr>
 * </table>
 *
 * <table>
 * <tr><td><b>N</b></td><td><b>Property</b></td></tr>
 * <tr><td>1</td><td>alphanumeric</td></tr>
 * <tr><td>2</td><td>alpha</td></tr>
 * <tr><td>3</td><td>control</td></tr>
 * <tr><td>4</td><td>digit</td></tr>
 * <tr><td>5</td><td>graphical</td></tr>
 * <tr><td>6</td><td>lowercase</td></tr>
 * <tr><td>7</td><td>printable</td></tr>
 * <tr><td>8</td><td>punctuation</td></tr>
 * <tr><td>9</td><td>space</td></tr>
 * <tr><td>10</td><td>uppercase</td></tr>
 * <tr><td>11</td><td>hex digit</td></tr>
 * <tr><td>12</td><td>title</td></tr>
 * <tr><td>13</td><td>defined</td></tr>
 * <tr><td>14</td><td>wide</td></tr>
 * <tr><td>15</td><td>nonspacing</td></tr>
 * <tr><td>16</td><td>Teletext G1 or G3 graphics</td></tr>
 * <tr><td>17</td><td>Teletext DRCS</td></tr>
 * </table>
 *
 * Character classes can contain literals, constants, and character
 * property classes. Example: [abc\U10A\p1,3,4]. Note double height
 * and size characters will match twice, on the upper and lower row,
 * and double width and size characters count as one (reducing the
 * line width) so one can find combinations of normal and enlarged
 * characters.
 *
 * @bug
 * In a multithreaded application the data service decoder may receive
 * and cache new pages during a search session. When these page numbers
 * have been visited already the pages are not searched. At a channel
 * switch (and in future at any time) pages can be removed from cache.
 * All this has yet to be addressed.
 *
 * @return
 * A vbi_search context or @c NULL on error.
 */
vbi_search *
vbi_search_new(vbi_decoder *vbi,
	       vbi_pgno pgno, vbi_subno subno,
	       uint16_t *pattern,
	       vbi_bool casefold, vbi_bool regexp,
	       int (* progress)(vbi_page *pg))
{
	vbi_search *s;
	ucs2_t *esc_pat = NULL;
	int i, j, pat_len = ucs2_strlen(pattern);

	if (pat_len <= 0)
		return NULL;

	if (!(s = calloc(1, sizeof(*s))))
		return NULL;

	if (!regexp) {
		if (!(esc_pat = malloc(sizeof(ucs2_t) * pat_len * 2))) {
			free(s);
			return NULL;
		}

		for (i = j = 0; i < pat_len; i++) {
			if (strchr("!\"#$%&()*+,-./:;=?@[\\]^_{|}~", pattern[i]))
				esc_pat[j++] = '\\';
			esc_pat[j++] = pattern[i];
		}

		pattern = esc_pat;
		pat_len = j;
	}

	if (!(s->ub = ure_buffer_create()))
		goto abort;

	if (!(s->ud = ure_compile(pattern, pat_len, casefold, s->ub))) {
abort:
		vbi_search_delete(s);

		if (!regexp)
			free(esc_pat);

		return NULL;
	}

	if (!regexp)
		free(esc_pat);

	s->stop_pgno[0] = pgno;
	s->stop_subno[0] = (subno == VBI_ANY_SUBNO) ? 0 : subno;

	if (subno <= 0) {
		s->stop_pgno[1] = (pgno <= 0x100) ? 0x8FF : pgno - 1;
		s->stop_subno[1] = 0x3F7E;
	} else {
		s->stop_pgno[1] = pgno;

		if ((subno & 0x7F) == 0)
			s->stop_subno[1] = (subno - 0x100) | 0x7E;
		else
			s->stop_subno[1] = subno - 1;
	}

	s->vbi = vbi;
	s->progress = progress;

	return s;
}

/**
 * @param search Initialized search context.
 * @param pg Place to store the formatted (as with vbi_fetch_vt_page())
 *   Teletext page containing the found pattern. Do <em>not</em>
 *   call vbi_unref_page() for this page. Also the page must not
 *   be modified. See vbi_search_status for semantics.
 * @param dir Search direction +1 forward or -1 backward.
 *
 * Find the next occurence of the search pattern.
 *
 * @return
 * vbi_search_status.
 */
/* XXX fix return type */
int
vbi_search_next(vbi_search *search, vbi_page **pg, int dir)
{
	*pg = NULL;
	dir = (dir > 0) ? +1 : -1;

	if (!search->dir) {
		search->dir = dir;

		if (dir > 0) {
			search->start_pgno = search->stop_pgno[0];
			search->start_subno = search->stop_subno[0];
		} else {
			search->start_pgno = search->stop_pgno[1];
			search->start_subno = search->stop_subno[1];
		}

		search->row[0] = FIRST_ROW;
		search->row[1] = LAST_ROW + 1;
		search->col[0] = search->col[1] = 0;
	}
#if 1 /* should switch to a 'two frontiers meet' model, but ok for now */
	else if (dir != search->dir) {
		search->dir = dir;

		search->stop_pgno[0] = search->start_pgno;
		search->stop_subno[0] = (search->start_subno == VBI_ANY_SUBNO) ?
			0 : search->start_subno;
		search->stop_pgno[1] = search->start_pgno;
		search->stop_subno[1] = search->start_subno;
	}
#endif
	switch (vbi_cache_foreach(search->vbi, search->start_pgno, search->start_subno, dir,
		 (dir > 0) ? search_page_fwd : search_page_rev, search)) {
	case 1:
		*pg = &search->pg;
		return VBI_SEARCH_SUCCESS;

	case 0:
		return VBI_SEARCH_CACHE_EMPTY;

	case -1:
		search->dir = 0;
		return VBI_SEARCH_NOT_FOUND;

	case -2:
		return VBI_SEARCH_CANCELED;

	default:
		break;
	}

	return VBI_SEARCH_ERROR;
}

#else /* !HAVE_GLIBC21 && !HAVE_LIBUNICODE */

vbi_search *
vbi_search_new(vbi_decoder *vbi,
	       vbi_pgno pgno, vbi_subno subno,
	       uint16_t *pattern,
	       vbi_bool casefold, vbi_bool regexp,
	       int (* progress)(vbi_page *pg))
{
	return NULL;
}

int
vbi_search_next(vbi_search *search, vbi_page **pg, int dir)
{
	return VBI_SEARCH_ERROR;
}

void
vbi_search_delete(vbi_search *search)
{
}

#endif /* !HAVE_GLIBC21 && !HAVE_LIBUNICODE */


syntax highlighted by Code2HTML, v. 0.9.1