#ifndef MXTEXTTOOLS_H
#define MXTEXTTOOLS_H
/*
mxTextTools -- Fast text manipulation routines
Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com
*/
/* The extension's name; must be the same as the init function's suffix */
#define MXTEXTTOOLS_MODULE "mxTextTools"
#include "mxbmse.h"
#ifdef MXFASTSEARCH
# include "private/mxfse.h"
#endif
/* Include generic mx extension header file */
#include "mxh.h"
#ifdef MX_BUILDING_MXTEXTTOOLS
# define MXTEXTTOOLS_EXTERNALIZE MX_EXPORT
#else
# define MXTEXTTOOLS_EXTERNALIZE MX_IMPORT
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* --- Text Search Object ---------------------------------------*/
/* Algorithm values */
#define MXTEXTSEARCH_BOYERMOORE 0
#define MXTEXTSEARCH_FASTSEARCH 1
#define MXTEXTSEARCH_TRIVIAL 2
typedef struct {
PyObject_HEAD
PyObject *match; /* Match string object */
PyObject *translate; /* Translate string object or NULL */
int algorithm; /* Algorithm to be used */
void *data; /* Internal data used by the algorithm or
NULL */
} mxTextSearchObject;
MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxTextSearch_Type;
#define mxTextSearch_Check(v) \
(((mxTextSearchObject *)(v))->ob_type == &mxTextSearch_Type)
/* Exporting these APIs for mxTextTools internal use only ! */
extern
int mxTextSearch_MatchLength(PyObject *self);
extern
int mxTextSearch_SearchBuffer(PyObject *self,
char *text,
int start,
int stop,
int *sliceleft,
int *sliceright);
#ifdef HAVE_UNICODE
extern
int mxTextSearch_SearchUnicode(PyObject *self,
Py_UNICODE *text,
int start,
int stop,
int *sliceleft,
int *sliceright);
#endif
/* --- Character Set Object -------------------------------------*/
/* Mode values */
#define MXCHARSET_8BITMODE 0
#define MXCHARSET_UCS2MODE 1
#define MXCHARSET_UCS4MODE 2
typedef struct {
PyObject_HEAD
PyObject *definition; /* Character set definition */
int mode; /* Operation mode:
0 - 8-bit character lookup
1 - UCS-2 Unicode lookup
2 - UCS-4 Unicode lookup
*/
void *lookup; /* Lookup table */
} mxCharSetObject;
MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxCharSet_Type;
#define mxCharSet_Check(v) \
(((mxCharSetObject *)(v))->ob_type == &mxCharSet_Type)
/* Exporting these APIs for mxTextTools internal use only ! */
extern
int mxCharSet_ContainsChar(PyObject *self,
register unsigned char ch);
#ifdef HAVE_UNICODE
extern
int mxCharSet_ContainsUnicodeChar(PyObject *self,
register Py_UNICODE ch);
#endif
extern
int mxCharSet_Match(PyObject *self,
PyObject *text,
int start,
int stop,
int direction);
/* --- Tag Table Object -----------------------------------------*/
typedef struct {
PyObject *tagobj; /* Tag object to assign, call,
append, etc. or NULL */
int cmd; /* Command integer */
int flags; /* Command flags */
PyObject *args; /* Command arguments */
int jne; /* Non-match jump offset */
int je; /* Match jump offset */
} mxTagTableEntry;
#define MXTAGTABLE_STRINGTYPE 0
#define MXTAGTABLE_UNICODETYPE 1
typedef struct {
PyObject_VAR_HEAD
PyObject *definition; /* Reference to the original
table definition or NULL;
needed for caching */
int tabletype; /* Type of compiled table:
0 - 8-bit string args
1 - Unicode args */
mxTagTableEntry entry[1]; /* Variable length array of
mxTagTableEntry fields;
ob_size gives the number of
allocated entries. */
} mxTagTableObject;
MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxTagTable_Type;
#define mxTagTable_Check(v) \
(((mxTagTableObject *)(v))->ob_type == &mxTagTable_Type)
#define mxTagTable_Type(v) \
(((mxTagTableObject *)(v))->tabletype)
#define mxTagTable_Definition(v) \
(((mxTagTableObject *)(v))->definition)
/* Exporting these APIs for mxTextTools internal use only ! */
extern
PyObject *mxTagTable_New(PyObject *definition,
int tabletype,
int cacheable);
/* --- Tagging Engine -------------------------------------------*/
/* Exporting these APIs for mxTextTools internal use only ! */
/* mxTextTools_TaggingEngine(): a table driven parser engine
- return codes: rc = 2: match ok; rc = 1: match failed; rc = 0: error
- doesn't check type of passed arguments !
- doesn't increment reference counts of passed objects !
*/
extern
int mxTextTools_TaggingEngine(PyObject *textobj,
int text_start,
int text_stop,
mxTagTableObject *table,
PyObject *taglist,
PyObject *context,
int *next);
extern
int mxTextTools_UnicodeTaggingEngine(PyObject *textobj,
int text_start,
int text_stop,
mxTagTableObject *table,
PyObject *taglist,
PyObject *context,
int *next);
/* Command integers for cmd; see Constants/TagTable.py for details */
/* Low-level string matching, using the same simple logic:
- match has to be a string
- they only modify x (the current position in text)
*/
#define MATCH_ALLIN 11
#define MATCH_ALLNOTIN 12
#define MATCH_IS 13
#define MATCH_ISIN 14
#define MATCH_ISNOTIN 15
#define MATCH_WORD 21
#define MATCH_WORDSTART 22
#define MATCH_WORDEND 23
#define MATCH_ALLINSET 31
#define MATCH_ISINSET 32
#define MATCH_ALLINCHARSET 41
#define MATCH_ISINCHARSET 42
#define MATCH_MAX_LOWLEVEL 99
/* Jumps and other low-level special commands */
#define MATCH_FAIL 100
#define MATCH_JUMP MATCH_FAIL
#define MATCH_EOF 101
#define MATCH_SKIP 102
#define MATCH_MOVE 103
#define MATCH_JUMPTARGET 104
#define MATCH_MAX_SPECIALS 199
/* Higher-level string matching */
#define MATCH_SWORDSTART 211
#define MATCH_SWORDEND 212
#define MATCH_SFINDWORD 213
#define MATCH_NOWORD MATCH_SWORDSTART
/* Higher-level special commands */
#define MATCH_CALL 201
#define MATCH_CALLARG 202
#define MATCH_TABLE 203
#define MATCH_SUBTABLE 207
#define MATCH_TABLEINLIST 204
#define MATCH_SUBTABLEINLIST 208
#define MATCH_LOOP 205
#define MATCH_LOOPCONTROL 206
/* Special argument integers */
#define MATCH_JUMP_TO 0
#define MATCH_JUMP_MATCHOK 1000000
#define MATCH_JUMP_MATCHFAIL -1000000
#define MATCH_MOVE_EOF -1
#define MATCH_MOVE_BOF 0
#define MATCH_FAIL_HERE 1
#define MATCH_THISTABLE 999
#define MATCH_LOOPCONTROL_BREAK 0
#define MATCH_LOOPCONTROL_RESET -1
/* Flags set in cmd (>=256) */
#define MATCH_CALLTAG (1 << 8)
#define MATCH_APPENDTAG (1 << 9)
#define MATCH_APPENDTAGOBJ (1 << 10)
#define MATCH_APPENDMATCH (1 << 11)
#define MATCH_LOOKAHEAD (1 << 12)
/* EOF */
#ifdef __cplusplus
}
#endif
#endif
syntax highlighted by Code2HTML, v. 0.9.1