(***********************************************************************) (* *) (* SpamOracle -- a Bayesian spam filter *) (* *) (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) (* *) (* Copyright 2002 Institut National de Recherche en Informatique et *) (* en Automatique. This file is distributed under the terms of the *) (* GNU Public License version 2, http://www.gnu.org/licenses/gpl.txt *) (* *) (***********************************************************************) (* $Id: htmlscan.mli,v 1.1 2003/01/02 08:52:15 xleroy Exp $ *) (** Approximate HTML scanner. Extracts words and certain parameters of certain tags (e.g. URLs) from HTML text. *) val extract_text: string -> string