//  crm114_osbf.h  - Controllable Regex Mutilator,  version v1.0
//  Copyright 2001-2004  William S. Yerazunis, all rights reserved.
//
//  This software is licensed to the public under the Free Software
//  Foundation's GNU GPL, version 2.  You may obtain a copy of the
//  GPL by visiting the Free Software Foundations web site at
//  www.fsf.org, and a copy is included in this distribution.
//
//  This file defines CSS header structure, data and constants used
//  by the OSBF-Bayes classifier.  -- Fidelis Assis - 2004/10/20
//

typedef struct
{
  unsigned long hash;
  unsigned long key;
  unsigned long value;
} OSBF_FEATUREBUCKET_STRUCT;

typedef struct
{
  unsigned char version[4];
  unsigned long flags;
  unsigned long buckets_start;	/* offset to first bucket, in bucket size units */
  unsigned long buckets;	/* number of buckets in the file */
  unsigned long learnings;	/* number of trainings executed */
} OSBF_FEATURE_HEADER_STRUCT;

/* define header size to be a multiple of bucket size with aprox. 4 Kbytes */
#define OSBF_CSS_SPECTRA_START (4096 / sizeof(OSBF_FEATUREBUCKET_STRUCT))

/* complete header */
typedef union
{
  OSBF_FEATURE_HEADER_STRUCT header;
  /*   buckets in header - not really buckets, but the header size is */
  /*   a multiple of bucket size */
  OSBF_FEATUREBUCKET_STRUCT bih[OSBF_CSS_SPECTRA_START];
} OSBF_HEADER_UNION;

#define BUCKET_VALUE_MASK 0x0000FFFFLU
#define BUCKET_LOCK_MASK  0x80000000LU
#define BUCKET_HASH(bucket) (bucket.hash)
#define BUCKET_KEY(bucket) (bucket.key)
#define BUCKET_RAW_VALUE(bucket) (bucket.value)
#define VALID_BUCKET(header, bucket_idx) (bucket_idx < header->buckets)
#define GET_BUCKET_VALUE(bucket) ((bucket.value) & BUCKET_VALUE_MASK)
#define BUCKET_IS_LOCKED(bucket) ((bucket.value) & BUCKET_LOCK_MASK)
#define SETL_BUCKET_VALUE(bucket, val) (bucket.value) = (val) | \
                                        BUCKET_LOCK_MASK
#define SET_BUCKET_VALUE(bucket, val) (bucket.value) = val
#define LOCK_BUCKET(bucket) (bucket.value) = (bucket.value) | BUCKET_LOCK_MASK
#define UNLOCK_BUCKET(bucket) (bucket.value) = (bucket.value) & \
                                                BUCKET_VALUE_MASK
#define BUCKET_IN_CHAIN(bucket) (GET_BUCKET_VALUE(bucket) != 0)
#define EMPTY_BUCKET(bucket) (GET_BUCKET_VALUE(bucket) == 0)
#define BUCKET_HASH_COMPARE(bucket, h, k) ((bucket.hash) == (h) && \
                                             (bucket.key) == (k))

/* CSS file version */
#define SBPH_VERSION		0
#define OSB_VERSION		1
#define CORRELATE_VERSION	2
#define NEURAL_VERSION		3
#define OSB_WINNOW_VERSION	4
#define OSBF_VERSION		5
#define UNKNOWN_VERSION		6

/*
  Array with pointers to CSS version names, indexed with the
  CSS file version numbers above. The array is defined in
  crm_osbf_maintenance.c
*/
extern char *CSS_version_name[];

/* max feature count */
#define OSBF_FEATUREBUCKET_VALUE_MAX 65535

#define OSBF_DEFAULT_SPARSE_SPECTRUM_FILE_LENGTH 94321

/* max chain len - microgrooming is triggered after this, if enabled */ 
#define OSBF_MICROGROOM_CHAIN_LENGTH 29
/* maximum number of buckets groom-zeroed */
#define OSBF_MICROGROOM_STOP_AFTER 128
/* minimum ratio between max and min P(F|C) */
#define OSBF_MIN_PMAX_PMIN_RATIO 1
/* max token size before starting "accumulation" of long tokens */
#define OSBF_MAX_TOKEN_SIZE 60
/* accumulate hashes up to this many long tokens */
#define OSBF_MAX_LONG_TOKENS 1000

extern int crm_expr_osbf_bayes_learn (CSL_CELL * csl, ARGPARSE_BLOCK * apb,
			      char *txtptr, long txtoffset, long txtlen);
extern int crm_expr_osbf_bayes_classify (CSL_CELL * csl,
					 ARGPARSE_BLOCK * apb,
			      char *txtptr, long txtoffset, long txtlen);
extern void crm_osbf_set_microgroom(int value);
extern void crm_osbf_microgroom (OSBF_FEATURE_HEADER_STRUCT * h,
				 unsigned long hindex);
extern void crm_osbf_packcss (OSBF_FEATURE_HEADER_STRUCT * h,
			      unsigned long packstart, unsigned long packlen);
extern void crm_osbf_packseg (OSBF_FEATURE_HEADER_STRUCT * h,
			      unsigned long packstart, unsigned long packlen);
extern unsigned long crm_osbf_next_bindex(OSBF_FEATURE_HEADER_STRUCT * header,
                    unsigned long index);
extern unsigned long crm_osbf_prev_bindex(OSBF_FEATURE_HEADER_STRUCT * header,
                    unsigned long index);
extern unsigned long crm_osbf_find_bucket (OSBF_FEATURE_HEADER_STRUCT * header,
                             unsigned long hash, unsigned long key);
extern void crm_osbf_update_bucket (OSBF_FEATURE_HEADER_STRUCT * header,
                            unsigned long bindex, int delta);
extern void crm_osbf_insert_bucket (OSBF_FEATURE_HEADER_STRUCT * header,
                            unsigned long bindex, unsigned long hash,
		                            unsigned long key, int value);
extern int crm_osbf_create_cssfile (char *cssfile, unsigned long buckets,
				    unsigned long major, unsigned long minor,
				    unsigned long spectrum_start);



syntax highlighted by Code2HTML, v. 0.9.1