/* * search.c: Foreground & background search in videotext-pages * * $Id: search.c,v 1.2 1997/05/03 22:17:30 mb Exp mb $ * * Copyright (c) 1994-96 Martin Buck * Read COPYING for more information * */ #include #include #include #include /* Required by regex.h */ #include #include #include "safe_malloc.h" #include "vtx_assert.h" #include "vtxdecode.h" #include "cache.h" #include "spool.h" #include "misc.h" #include "search.h" typedef enum { AR_ADD, AR_REP, AR_REPNEW, AR_NOREP, AR_MAXRES } addresult_stat_t; typedef struct searchreq_s { int reqid; char *str; regex_t *regex; search_result_t **result; unsigned fg : 1; unsigned case_sens : 1; unsigned whole_words : 1; unsigned multi_line : 1; unsigned bell : 1; unsigned keep : 1; unsigned read : 1; } searchreq_t; static searchreq_t *searchreq; static int req_count; static char regerr_msg[512] = "Regular expression error: "; static char *regerr_str = regerr_msg + sizeof("Regular expression error: ") - 1; static void page2searchstr(const vtxpage_t *page, char *pagestr, int lowercase) { int pos, ofs; const char *chconv_table; chconv_table = lowercase ? vtx2iso_lc_table : vtx2iso_table; ofs = 0; for (pos = 0; pos < VTX_PAGESIZE; pos++) { pagestr[ofs++] = chconv_table[page->chr[pos]]; if (pos % 40 == 39) { pagestr[ofs++] = '\n'; } } pagestr[ofs] = '\0'; } static int addmatch(int start, int end, int whole_words, const char *page, search_result_t **result) { if (whole_words) { if ((start % 41) && !strchr(vtxiso_worddelim, page[start - 1]) && !strchr(vtxiso_worddelim, page[start])) { return FALSE; } if ((end % 41) != 0 && (end % 41) != 40 && !strchr(vtxiso_worddelim, page[end - 1]) && !strchr(vtxiso_worddelim, page[end])) { return FALSE; } } if (!*result) { *result = smalloc(sizeof(search_result_t)); (*result)->count = 0; (*result)->read = FALSE; (*result)->match = NULL; } (*result)->match = srealloc((*result)->match, ((*result)->count + 1) * sizeof(search_result_elem_t)); /* We don't want to count the LF's in our results */ if (start % 41 == 40) { start--; } start -= start / 41; if (end % 41 == 40) { end--; } end -= end / 41; (*result)->match[(*result)->count].ofs = start; (*result)->match[(*result)->count].len = end - start; (*result)->count++; return TRUE; } static search_result_t * dosearch(const byte_t *pgbuf, const vtx_pageinfo_t *pginf, const char *pagestr, int case_sens, const searchreq_t *req, const char **errmsg) { vtxpage_t tmppage; char tmp_pagestr[VTX_PAGESIZE + 25]; search_result_t *result = NULL; char *hit; const char *searchstart; int regerr, len; regmatch_t rm; if (!pagestr) { tmppage.info = *pginf; decode_page(pgbuf, &tmppage, 0, 23); page2searchstr(&tmppage, tmp_pagestr, !case_sens); pagestr = tmp_pagestr; } searchstart = pagestr; if (req->regex) { while (searchstart - pagestr < VTX_PAGESIZE + 24 && !(regerr = regexec(req->regex, searchstart, 1, &rm, REG_NOTBOL * (searchstart != pagestr)))) { if (rm.rm_eo > rm.rm_so && addmatch(searchstart - pagestr + rm.rm_so, searchstart - pagestr + rm.rm_eo, req->whole_words, pagestr, &result)) { searchstart += rm.rm_eo; } else { searchstart += rm.rm_so + 1; } } if (regerr && regerr != REG_NOMATCH) { regerror(regerr, req->regex, regerr_str, sizeof(regerr_msg) - (regerr_str - regerr_msg)); *errmsg = regerr_msg; } } else { len = strlen(req->str); while (searchstart - pagestr < VTX_PAGESIZE + 24 && (hit = strstr(searchstart, req->str))) { if (addmatch(hit - pagestr, hit - pagestr + len, req->whole_words, pagestr, &result)) { searchstart = hit + len; } else { searchstart = hit + 1; } } } if (result) { memcpy(result->pgbuf, pgbuf, VTX_PAGESIZE); result->pginf = *pginf; } return result; } static int mkreq(searchreq_t *req, const search_data_t *dat, int fg, const char **errmsg) { int regerr; char *str, *chrptr; if (dat->str[0] == '\0') { *errmsg = "Invalid search expression."; return FALSE; } req->str = sstrdup(dat->str); /* Convert search expression to lower case, taking ISO 8859-1 characters representable in * videotext into account. We don't use tolower() to be independent of correct locale- * support. */ if (!dat->case_sens) { for (str = req->str; *str; str++) { if ((chrptr = strchr(vtx2iso_table, *str))) { *str = vtx2iso_lc_table[chrptr - vtx2iso_table]; } } } if (dat->is_regexp) { req->regex = smalloc(sizeof(regex_t)); if ((regerr = regcomp(req->regex, req->str, REG_EXTENDED | (REG_NEWLINE * !dat->multi_line)))) { regerror(regerr, req->regex, regerr_str, sizeof(regerr_msg) - (regerr_str - regerr_msg)); regfree(req->regex); *errmsg = regerr_msg; free(req->regex); return FALSE; } } else { req->regex = NULL; } req->fg = !!fg; req->case_sens = dat->case_sens; req->whole_words = dat->whole_words; req->multi_line = dat->multi_line; req->bell = dat->bell; req->keep = FALSE; req->read = FALSE; req->result = NULL; return TRUE; } static int find_req(int reqid) { int entry; for (entry = 0; entry < req_count; entry++) { if (searchreq[entry].reqid == reqid) { return entry; } } return -1; } static addresult_stat_t addresult(searchreq_t *req, search_result_t *result, int replace) { addresult_stat_t retval; search_result_t *old_result; int rescount, newent, inspos, loop, oldloop; old_result = NULL; newent = -1; rescount = inspos = 0; if (req->result) { for (rescount = 0; req->result[rescount]; rescount++) { if (req->result[rescount]->pginf.pagenum == result->pginf.pagenum && req->result[rescount]->pginf.minute == result->pginf.minute) { if (!replace) { return AR_NOREP; } newent = rescount; old_result = req->result[rescount]; break; } else if (req->result[rescount]->pginf.pagenum < result->pginf.pagenum || (req->result[rescount]->pginf.pagenum == result->pginf.pagenum && req->result[rescount]->pginf.minute < result->pginf.minute)) { inspos = rescount + 1; } } } if (newent < 0) { if (++rescount > SEARCH_MAXRESULTS) { return AR_MAXRES; } req->result = srealloc(req->result, (rescount + 1) * sizeof(search_result_t *)); memmove(req->result + inspos + 1, req->result + inspos, (rescount - inspos - 1) * sizeof(search_result_t *)); req->result[rescount] = NULL; newent = inspos; retval = AR_ADD; } else { if (old_result) { retval = AR_REP; result->read = old_result->read; for (loop = 0; loop < result->count; loop++) { for (oldloop = 0; oldloop < old_result->count; oldloop++) { if (result->match[loop].ofs == old_result->match[oldloop].ofs && result->match[loop].len == old_result->match[oldloop].len) { break; } } if (oldloop >= old_result->count) { retval = AR_REPNEW; result->read = FALSE; break; } } free(old_result->match); free(old_result); } else { retval = AR_REPNEW; } } req->result[newent] = result; if (retval == AR_ADD || retval == AR_REPNEW) { req->read = FALSE; } return retval; } int search_page(const byte_t *pgbuf, const vtx_pageinfo_t *pginf, const search_data_t *dat, int level, const char **errmsg) { searchreq_t *req; search_result_t *tmpres; int reqid, addstat; byte_t tmppgbuf[VTX_VIRTUALSIZE]; vtx_pageinfo_t tmppginf; if ((reqid = search_add_req(dat, TRUE, errmsg)) < 0) { return -1; } req = &searchreq[find_req(reqid)]; if ((tmpres = dosearch(pgbuf, pginf, NULL, dat->case_sens, req, errmsg))) { if ((addstat = addresult(req, tmpres, FALSE)) != AR_ADD) { free(tmpres->match); free(tmpres); if (addstat == AR_MAXRES) { return reqid; } } } if (level >= 1) { cache_loop_t loop; loop.page = -1; while (cache_loop(&loop, tmppgbuf, &tmppginf)) { if ((tmppginf.pagenum != pginf->pagenum || tmppginf.minute != pginf->minute) && (tmpres = dosearch(tmppgbuf, &tmppginf, NULL, dat->case_sens, req, errmsg))) { if ((addstat = addresult(req, tmpres, FALSE)) != AR_ADD) { free(tmpres->match); free(tmpres); if (addstat == AR_MAXRES) { return reqid; } } } } } if (level >= 2) { spool_dirent_t *spdir, *spptr; spdir = spptr = spool_getlist(TRUE, TRUE); if (spdir) { while (spptr->page) { if (query_spool(spptr->page, spptr->subpage, tmppgbuf, &tmppginf) && (tmpres = dosearch(tmppgbuf, &tmppginf, NULL, dat->case_sens, req, errmsg))) { if ((addstat = addresult(req, tmpres, FALSE)) != AR_ADD) { free(tmpres->match); free(tmpres); if (addstat == AR_MAXRES) { return reqid; } } } spptr++; } free(spdir); } } return reqid; } int search_add_req(const search_data_t *dat, int fg, const char **errmsg) { int new_reqid; static int curr_reqid, reqalloc_count; new_reqid = ++curr_reqid; if (reqalloc_count <= req_count) { searchreq = srealloc(searchreq, ++reqalloc_count * sizeof(searchreq_t)); } if (!mkreq(&searchreq[req_count], dat, fg, errmsg)) { return -1; } searchreq[req_count].reqid = new_reqid; req_count++; return new_reqid; } int search_rm_req(int reqid) { int entry; entry = find_req(reqid); assert(entry >= 0); if (searchreq[entry].result) { search_free_result(searchreq[entry].result); } if (searchreq[entry].regex) { regfree(searchreq[entry].regex); free(searchreq[entry].regex); } free(searchreq[entry].str); memmove(&searchreq[entry], &searchreq[entry + 1], sizeof(searchreq_t) * (--req_count - entry)); return TRUE; } void search_get_data(int reqid, search_data_t *dat) { int entry; entry = find_req(reqid); assert(entry >= 0); dat->str = searchreq[entry].str; dat->case_sens = searchreq[entry].case_sens; dat->is_regexp = searchreq[entry].regex ? TRUE : FALSE; dat->whole_words = searchreq[entry].whole_words; dat->multi_line = searchreq[entry].multi_line; dat->bell = searchreq[entry].bell; } int search_get_currimg(int reqid) { int entry, retval; entry = find_req(reqid); assert(entry >= 0); if (searchreq[entry].fg) { retval = SEARCH_FG_BITMAPS; } else if (searchreq[entry].keep) { retval = SEARCH_DISK_BITMAPS; } else { retval = SEARCH_BG_BITMAPS; } if (searchreq[entry].read) { retval += SEARCH_READ_BITMAPS; } else if (searchreq[entry].result) { retval += SEARCH_FOUND_BITMAPS; } else { retval += SEARCH_NOFOUND_BITMAPS ; } return retval; } int search_get_fg(int reqid) { int entry; entry = find_req(reqid); assert(entry >= 0); return searchreq[entry].fg; } int search_get_keep(int reqid) { int entry; entry = find_req(reqid); assert(entry >= 0); return searchreq[entry].keep; } void search_set_allread(int reqid) { int entry; entry = find_req(reqid); assert(entry >= 0); searchreq[entry].read = TRUE; } void search_set_keepreq(int reqid) { int entry; entry = find_req(reqid); assert(entry >= 0); searchreq[entry].keep = TRUE; } void search_free_result(search_result_t **result) { search_result_t **resloop = result; if (result) { while (*resloop) { free((*resloop)->match); free(*resloop); resloop++; } free(result); } } search_result_t ** search_get_result(int reqid) { int entry; entry = find_req(reqid); return (entry < 0 ? NULL : searchreq[entry].result); } void search_bg_newpage(const byte_t *pgbuf, const vtx_pageinfo_t *pginf, search_notify_fn notify_fn) { int loop, found_cs, found_ncs, addstat; vtxpage_t tmppage; char pagestr[VTX_PAGESIZE + 25]; searchreq_t *req; search_result_t *tmpres; const char *errmsg; /* Dummy */ found_cs = found_ncs = FALSE; for (loop = 0; loop < req_count; loop++) { if (searchreq[loop].reqid) { if (searchreq[loop].case_sens) { found_cs = TRUE; } else { found_ncs = TRUE; } } } if (!found_cs && !found_ncs) { return; } tmppage.info = *pginf; decode_page(pgbuf, &tmppage, 0, 23); if (found_cs) { page2searchstr(&tmppage, pagestr, FALSE); for (loop = 0; loop < req_count; loop++) { req = &searchreq[loop]; if (req->reqid && req->case_sens && (tmpres = dosearch(pgbuf, pginf, pagestr, TRUE, req, &errmsg))) { if ((addstat = addresult(req, tmpres, TRUE)) == AR_MAXRES) { free(tmpres->match); free(tmpres); return; } notify_fn(req->reqid, req->bell && (addstat == AR_ADD || addstat == AR_REPNEW)); } } } if (found_ncs) { page2searchstr(&tmppage, pagestr, TRUE); for (loop = 0; loop < req_count; loop++) { req = &searchreq[loop]; if (req->reqid && !req->case_sens && (tmpres = dosearch(pgbuf, pginf, pagestr, FALSE, req, &errmsg))) { if ((addstat = addresult(req, tmpres, TRUE)) == AR_MAXRES) { free(tmpres->match); free(tmpres); return; } notify_fn(req->reqid, req->bell && (addstat == AR_ADD || addstat == AR_REPNEW)); } } } }