/* Sketch - A Python-based interactive drawing program * Copyright (C) 1998, 1999, 2000, 2001, 2003, 2006 by Bernhard Herzog * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Functions to tokenize PostScript-files. */ #include #include #include #include "filterobj.h" #include "pschartab.c" static PyObject * Filter_Type = NULL; static Filter_Functions *filter_functions = NULL; typedef struct { PyObject_HEAD FilterObject * source; int beginning_of_line; char ai_pseudo_comments; char ai_dsc; } PSTokenizerObject; staticforward PyTypeObject PSTokenizerType; #define NAME 258 #define INT 259 #define FLOAT 260 #define STRING 261 #define MAX_DATA_TOKEN 261 #define OPERATOR 262 #define DSC_COMMENT 263 #define END 264 #define GETC() (Filter_DL_GETC((filter_functions), (self->source))) #define BACK(c) \ (filter_functions->Filter_Ungetc(((PyObject*)self->source), (c))) static int read_newline(PSTokenizerObject * self, int c) { if (c == '\r') { c = GETC(); if (c != '\n') { BACK(c); } } self->beginning_of_line = 1; return 0; } /* Return the contents of a DSC-comment as a string object. * * The stream is assumed to be positioned after the '%%' or '%!'. * Afterwards, the stream is positioned at the beginning of the next * line or EOF. The string does not contain the final newline character. */ static PyObject * read_dsc_comment(PSTokenizerObject * self) { int size; int maxsize = 256; PyObject * value; char * buf, * end; value = PyString_FromStringAndSize((char*)NULL, maxsize); if (!value) return NULL; buf = PyString_AsString(value); end = buf + maxsize; for (;;) { int c = GETC(); if (c == EOF) break; *buf++ = c; if ((char_types[c] & NEWLINE) == NEWLINE) { read_newline(self, c); buf -= 1; break; } if (buf == end) { size = maxsize; maxsize = maxsize + 1000; if (_PyString_Resize(&value, maxsize) < 0) return NULL; buf = PyString_AsString(value) + size; end = PyString_AsString(value) + maxsize; } } if (buf < end) { size = buf - PyString_AsString(value); if (_PyString_Resize(&value, size) < 0) return NULL; } return value; } static void discard_comment(PSTokenizerObject * self) { int c; for (;;) { c = GETC(); if (c == EOF) return; if (char_types[c] & NEWLINE) { read_newline(self, c); return; } } } /* Read a comment. The stream is assumed to be positioned just after the * initial '%'. If the comment is a DSC-comment (the next char is a '%' * or a '!' and the comment is at the beginning of a line), the contents * of the comment (without the leading '%%' or '%!') are returned as a * string object. * * In a normal comments discard all input until the next newline and * return NULL. * * Furthermore, if ai_pseudo_comments is true, recognize Adobe * Illustrator pseudo comments, which start with '%_' and discard just * these two characters. The rest of the comment is treated as normal * input. */ static PyObject * read_comment(PSTokenizerObject * self) { int c; PyObject * value = NULL; c = GETC(); if (self->beginning_of_line && (c == '%' || c == '!')) { value = read_dsc_comment(self); } else { if (c == '_' && self->ai_pseudo_comments) { } else if (self->beginning_of_line && c == 'A' && self->ai_dsc) { BACK(c); value = read_dsc_comment(self); } else if (c != EOF && (char_types[c] & NEWLINE)) read_newline(self, c); else discard_comment(self); } return value; } /* * Return the PostScript string literal as a Python string object. * * The stream is assumed to be positioned just after the initial '('. * Afterwards, the stream is positioned just after the closing ')'. */ static PyObject * read_string(PSTokenizerObject * self) { int depth = 0; int size; int maxsize = 256; PyObject * value; char * buf, * end; value = PyString_FromStringAndSize((char*)NULL, maxsize); if (!value) return NULL; buf = PyString_AsString(value); end = buf + maxsize; for (;;) { int c = GETC(); switch (c) { case EOF: /* end of input in string constant! */ Py_DECREF(value); PyErr_SetString(PyExc_EOFError, "unexpected end of input"); return NULL; case '(': depth += 1; *buf++ = c; break; case ')': depth -= 1; if (depth < 0) { size = buf - PyString_AsString(value); if (_PyString_Resize(&value, size) < 0) { return NULL; } return value; } else *buf++ = c; break; case '\\': c = GETC(); switch(c) { case 'b': *buf++ = '\b'; break; case 'f': *buf++ = '\f'; break; case 'n': *buf++ = '\n'; break; case 'r': *buf++ = '\r'; break; case 't': *buf++ = '\t'; break; case '\\': *buf++ = '\\'; break; case '(': *buf++ = '('; break; case ')': *buf++ = ')'; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { int code = c - '0'; c = GETC(); if ('0' <= c && c <= '7') { code = code * 8 + c - '0'; c = GETC(); if ('0' <= c && c <= '7') { code = code * 8 + c - '0'; c = GETC(); } } *buf++ = code; BACK(c); } break; case '\r': c = GETC(); if (c != '\n') BACK(c); break; case '\n': break; default: *buf++ = c; } break; case '\r': c = GETC(); if (c != '\n') BACK(c); *buf++ = '\n'; break; default: *buf++ = c; } if (buf == end) { size = maxsize; maxsize = maxsize + 1000; if (_PyString_Resize(&value, maxsize) < 0) return NULL; buf = PyString_AsString(value) + size; end = PyString_AsString(value) + maxsize; } } /* for (;;) */ /* unreachable */ return NULL; } /* * Return the PostScript hex string literal as a Python string object. * * The stream is assumed to be positioned just after the initial '<'. * Afterwards, the stream is positioned just after the closing '>'. */ static PyObject * read_hex_string(PSTokenizerObject * self) { int size; int maxsize = 256; int last_digit = -1, digit; PyObject * value; char * buf, * end; value = PyString_FromStringAndSize((char*)NULL, maxsize); if (!value) return NULL; buf = PyString_AsString(value); end = buf + maxsize; for (;;) { int c = GETC(); digit = -1; switch (c) { case EOF: /* end of input in string constant! */ Py_DECREF(value); PyErr_SetString(PyExc_EOFError, "unexpected end of input"); return NULL; case '>': size = buf - PyString_AsString(value); if (_PyString_Resize(&value, size) < 0) { return NULL; } return value; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': digit = c - '0'; break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': digit = c - 'A' + 10; break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': digit = c - 'a' + 10; break; default: if (!(char_types[c] & WHITESPACE)) { Py_DECREF(value); PyErr_SetString(PyExc_SyntaxError, "invalid character in hex string"); return NULL; } } if (digit >= 0) { if (last_digit < 0) { last_digit = digit; } else { *buf++ = last_digit * 16 + digit; last_digit = -1; } if (buf == end) { size = maxsize; maxsize = maxsize + 1000; if (_PyString_Resize(&value, maxsize) < 0) return NULL; buf = PyString_AsString(value) + size; end = PyString_AsString(value) + maxsize; } } } /* for (;;) */ /* unreachable */ return NULL; } /* * */ static PyObject * read_name_or_number(PSTokenizerObject * self, int * token, int isname) { int size; int maxsize = 256; PyObject * value; char * buf, * end; *token = 0; value = PyString_FromStringAndSize((char*)NULL, maxsize); if (!value) return NULL; buf = PyString_AsString(value); end = buf + maxsize; for (;;) { int c = GETC(); if (c == EOF) break; if ((char_types[c] & NAMECHAR) == 0) { BACK(c); *buf = '\0'; break; } *buf++ = c; if (buf == end) { size = maxsize; maxsize = maxsize + 1000; if (_PyString_Resize(&value, maxsize) < 0) return NULL; buf = PyString_AsString(value) + size; end = PyString_AsString(value) + maxsize; } } /* check for a number */ if (!isname) { char * start = PyString_AsString(value); char * p = start; char * numend; while(char_types[(int)*p] & INTCHAR) p += 1; if (char_types[(int)*p] & FLOATCHAR) { char * old_locale; double result; /* Change LC_NUMERIC locale to "C" around the strtod * call so that it parses the number correctly. */ old_locale = strdup(setlocale(LC_NUMERIC, NULL)); setlocale(LC_NUMERIC, "C"); result = strtod(start, &numend); setlocale(LC_NUMERIC, old_locale); free(old_locale); if (numend == buf) { Py_DECREF(value); *token = FLOAT; return PyFloat_FromDouble(result); } } else { int result = strtol(start, &numend, 10); if (numend == buf) { Py_DECREF(value); *token = INT; return PyInt_FromLong(result); } } } if (buf < end) { size = buf - PyString_AsString(value); if (_PyString_Resize(&value, size) < 0) return NULL; } *token = OPERATOR; return value; } static PyObject * pslex(PSTokenizerObject * self) { int token = 0; PyObject * value = NULL; PyObject * result; int c, ctype; while (token == 0) { c = GETC(); if (c != '%') self->beginning_of_line = 0; switch (c) { case EOF: Py_INCREF(Py_None); value = Py_None; token = END; break; case '%': value = read_comment(self); if (value) token = DSC_COMMENT; break; case '[': case ']': case '{': case '}': /* a single character token */ { char buf[2] = "\000\000"; buf[0] = c; value = PyString_FromString(buf); token = OPERATOR; } break; case '(': value = read_string(self); token = STRING; break; case '<': /* this case should check the next character to also recognize the << operator and base85 encoded strings */ value = read_hex_string(self); token = STRING; break; case '/': value = read_name_or_number(self, &token, 1); token = NAME; break; default: ctype = char_types[c]; if (ctype & WHITESPACE) { while (ctype & WHITESPACE) { self->beginning_of_line = (ctype & NEWLINE) == NEWLINE; c = GETC(); if (c == EOF) break; ctype = char_types[c]; } if (c != EOF) BACK(c); } else if (ctype & NAMECHAR) { BACK(c); /* NAMECHAR includes digits */ value = read_name_or_number(self, &token, 0); } else { PyErr_Format(PyExc_IOError, "unexpected character %d (flags %.4x)", c, ctype); token = -1; } } /* switch */ } /* while token == 0 */ if (token < 0 || value == NULL) return NULL; result = Py_BuildValue("(iO)", token, value); Py_DECREF(value); return result; } /* * */ static PyObject * pstokenizer_next(PSTokenizerObject * self, PyObject * args) { return pslex(self); } /* * */ static PyObject * pstokenizer_next_dsc(PSTokenizerObject * self, PyObject * args) { PyObject * result = NULL; int c; for (;;) { c = GETC(); if (c == EOF) break; else if (char_types[c] & NEWLINE) { read_newline(self, c); } else if (c == '%') { result = read_comment(self); if (result) break; } else { self->beginning_of_line = 0; } } if (!result) { result = PyString_FromString(""); } return result; } /* * */ static PyObject * pstokenizer_read(PSTokenizerObject * self, PyObject * args) { PyObject * result = NULL; long length, read; if (!PyArg_ParseTuple(args, "l", &length)) return NULL; result = PyString_FromStringAndSize(NULL, length); if (!result) return NULL; read = filter_functions->Filter_Read((PyObject*)(self->source), PyString_AsString(result), length); if (read == 0 && PyErr_Occurred()) { Py_DECREF(result); return NULL; } if (_PyString_Resize(&result, read) < 0) return NULL; return result; } /* * */ static PyObject * PSTokenizer_FromStream(FilterObject * filter) { PSTokenizerObject * self; self = PyObject_New(PSTokenizerObject, &PSTokenizerType); if (!self) return NULL; Py_INCREF(filter); self->source = filter; self->beginning_of_line = 1; self->ai_pseudo_comments = 0; self->ai_dsc = 0; return (PyObject*)self; } static void pstokenizer_dealloc(PSTokenizerObject * self) { Py_DECREF(self->source); PyObject_Del(self); } static PyObject * pstokenizer_repr(PSTokenizerObject * self) { char buf[1000]; PyObject * streamrepr; streamrepr = PyObject_Repr((PyObject*)(self->source)); if (!streamrepr) return NULL; sprintf(buf, "", PyString_AsString(streamrepr)); Py_DECREF(streamrepr); return PyString_FromString(buf); } #define OFF(x) offsetof(PSTokenizerObject, x) static struct memberlist pstokenizer_memberlist[] = { {"source", T_OBJECT, OFF(source), RO}, {"ai_pseudo_comments", T_BYTE, OFF(ai_pseudo_comments)}, {"ai_dsc", T_BYTE, OFF(ai_dsc)}, {NULL} }; static struct PyMethodDef pstokenizer_methods[] = { {"next", (PyCFunction)pstokenizer_next, 1}, {"next_dsc", (PyCFunction)pstokenizer_next_dsc, 1}, {"read", (PyCFunction)pstokenizer_read, 1}, {NULL, NULL} }; static PyObject * pstokenizer_getattr(PyObject * self, char * name) { PyObject * result; result = Py_FindMethod(pstokenizer_methods, self, name); if (result != NULL) return result; PyErr_Clear(); return PyMember_Get((char *)self, pstokenizer_memberlist, name); } static int pstokenizer_setattr(PyObject * self, char * name, PyObject * v) { if (v == NULL) { PyErr_SetString(PyExc_AttributeError, "can't delete object attributes"); return -1; } return PyMember_Set((char *)self, pstokenizer_memberlist, name, v); } static PyTypeObject PSTokenizerType = { PyObject_HEAD_INIT(NULL) 0, "pstokenizer", sizeof(PSTokenizerObject), 0, (destructor)pstokenizer_dealloc,/*tp_dealloc*/ (printfunc)0, /*tp_print*/ pstokenizer_getattr, /*tp_getattr*/ pstokenizer_setattr, /*tp_setattr*/ (cmpfunc)0, /*tp_compare*/ (reprfunc)pstokenizer_repr, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash*/ }; /* * */ static PyObject * pstokenizer_new(PyObject * self, PyObject * args) { FilterObject * source; if (!PyArg_ParseTuple(args, "O!", Filter_Type, &source)) return NULL; return PSTokenizer_FromStream(source); } /* * Method table and module initialization */ static PyMethodDef pstokenize_functions[] = { {"PSTokenizer", pstokenizer_new, 1}, {NULL, NULL} }; static void add_int(PyObject * dict, int i, char * name) { PyObject *v; v = Py_BuildValue("i", i); if (!v) PyErr_Clear(); if (PyDict_SetItemString(dict, name, v) < 0) PyErr_Clear(); } DL_EXPORT(void) initpstokenize(void) { PyObject * d, *m, *r, *filter; PSTokenizerType.ob_type = &PyType_Type; m = Py_InitModule("pstokenize", pstokenize_functions); d = PyModule_GetDict(m); #define ADD_INT(name) add_int(d, name, #name) ADD_INT(NAME); ADD_INT(INT); ADD_INT(FLOAT); ADD_INT(STRING); ADD_INT(OPERATOR); ADD_INT(DSC_COMMENT); ADD_INT(END); ADD_INT(MAX_DATA_TOKEN); /* import some objects from filter */ filter = PyImport_ImportModule("streamfilter"); if (filter) { Filter_Type = PyObject_GetAttrString(filter, "FilterType"); if (!Filter_Type) return; r = PyObject_GetAttrString(filter, "Filter_Functions"); if (!r) return; filter_functions = (Filter_Functions*)PyCObject_AsVoidPtr(r); Py_DECREF(r); } }