PKp'7i4}u  pygments/scanner.py# -*- coding: utf-8 -*- """ pygments.scanner ~~~~~~~~~~~~~~~~ This library implements a regex based scanner. Some languages like Pascal are easy to parse but have some keywords that depend on the context. Because of this it's impossible to lex that just by using a regular expression lexer like the `RegexLexer`. Have a look at the `DelphiLexer` to get an idea of how to use this scanner. :copyright: 2006-2007 by Armin Ronacher. :license: BSD, see LICENSE for more details. """ import re class EndOfText(RuntimeError): """ Raise if end of text is reached and the user tried to call a match function. """ class Scanner(object): """ Simple scanner All method patterns are regular expression strings (not compiled expressions!) """ def __init__(self, text, flags=0): """ :param text: The text which should be scanned :param flags: default regular expression flags """ self.data = text self.data_length = len(text) self.start_pos = 0 self.pos = 0 self.flags = flags self.last = None self.match = None self._re_cache = {} def eos(self): """`True` if the scanner reached the end of text.""" return self.pos >= self.data_length eos = property(eos, eos.__doc__) def check(self, pattern): """ Apply `pattern` on the current position and return the match object. (Doesn't touch pos). Use this for lookahead. """ if self.eos: raise EndOfText() if pattern not in self._re_cache: self._re_cache[pattern] = re.compile(pattern, self.flags) return self._re_cache[pattern].match(self.data, self.pos) def test(self, pattern): """Apply a pattern on the current position and check if it patches. Doesn't touch pos.""" return self.check(pattern) is not None def scan(self, pattern): """ Scan the text for the given pattern and update pos/match and related fields. The return value is a boolen that indicates if the pattern matched. The matched value is stored on the instance as ``match``, the last value is stored as ``last``. ``start_pos`` is the position of the pointer before the pattern was matched, ``pos`` is the end position. """ if self.eos: raise EndOfText() if pattern not in self._re_cache: self._re_cache[pattern] = re.compile(pattern, self.flags) self.last = self.match m = self._re_cache[pattern].match(self.data, self.pos) if m is None: return False self.start_pos = m.start() self.pos = m.end() self.match = m.group() return True def get_char(self): """Scan exactly one char.""" self.scan('.') def __repr__(self): return '<%s %d/%d>' % ( self.__class__.__name__, self.pos, self.data_length ) PKQh<7-G[bXbXpygments/lexer.py# -*- coding: utf-8 -*- """ pygments.lexer ~~~~~~~~~~~~~~ Base lexer classes. :copyright: 2006-2007 by Georg Brandl. :license: BSD, see LICENSE for more details. """ import re try: set except NameError: from sets import Set as set from pygments.filter import apply_filters, Filter from pygments.filters import get_filter_by_name from pygments.token import Error, Text, Other, _TokenType from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \ make_analysator __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer', 'LexerContext', 'include', 'flags', 'bygroups', 'using', 'this'] _default_analyse = staticmethod(lambda x: 0.0) class LexerMeta(type): """ This metaclass automagically converts ``analyse_text`` methods into static methods which always return float values. """ def __new__(cls, name, bases, d): if 'analyse_text' in d: d['analyse_text'] = make_analysator(d['analyse_text']) return type.__new__(cls, name, bases, d) class Lexer(object): """ Lexer for a specific language. Basic options recognized: ``stripnl`` Strip leading and trailing newlines from the input (default: True). ``stripall`` Strip all leading and trailing whitespace from the input (default: False). ``tabsize`` If given and greater than 0, expand tabs in the input (default: 0). ``encoding`` If given, must be an encoding name. This encoding will be used to convert the input string to Unicode, if it is not already a Unicode string (default: ``'latin1'``). Can also be ``'guess'`` to use a simple UTF-8 / Latin1 detection, or ``'chardet'`` to use the chardet library, if it is installed. """ #: Name of the lexer name = None #: Shortcuts for the lexer aliases = [] #: fn match rules filenames = [] #: fn alias filenames alias_filenames = [] #: mime types mimetypes = [] __metaclass__ = LexerMeta def __init__(self, **options): self.options = options self.stripnl = get_bool_opt(options, 'stripnl', True) self.stripall = get_bool_opt(options, 'stripall', False) self.tabsize = get_int_opt(options, 'tabsize', 0) self.encoding = options.get('encoding', 'latin1') # self.encoding = options.get('inencoding', None) or self.encoding self.filters = [] for filter_ in get_list_opt(options, 'filters', ()): self.add_filter(filter_) def __repr__(self): if self.options: return '' % (self.__class__.__name__, self.options) else: return '' % self.__class__.__name__ def add_filter(self, filter_, **options): """ Add a new stream filter to this lexer. """ if not isinstance(filter_, Filter): filter_ = get_filter_by_name(filter_, **options) self.filters.append(filter_) def analyse_text(text): """ Has to return a float between ``0`` and ``1`` that indicates if a lexer wants to highlight this text. Used by ``guess_lexer``. If this method returns ``0`` it won't highlight it in any case, if it returns ``1`` highlighting with this lexer is guaranteed. The `LexerMeta` metaclass automatically wraps this function so that it works like a static method (no ``self`` or ``cls`` parameter) and the return value is automatically converted to `float`. If the return value is an object that is boolean `False` it's the same as if the return values was ``0.0``. """ def get_tokens(self, text, unfiltered=False): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism is bypassed even if filters are defined. Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. """ if isinstance(text, unicode): text = u'\n'.join(text.splitlines()) else: text = '\n'.join(text.splitlines()) if self.encoding == 'guess': try: text = text.decode('utf-8') if text.startswith(u'\ufeff'): text = text[len(u'\ufeff'):] except UnicodeDecodeError: text = text.decode('latin1') elif self.encoding == 'chardet': try: import chardet except ImportError: raise ImportError('To enable chardet encoding guessing, ' 'please install the chardet library ' 'from http://chardet.feedparser.org/') enc = chardet.detect(text) text = text.decode(enc['encoding']) else: text = text.decode(self.encoding) if self.stripall: text = text.strip() elif self.stripnl: text = text.strip('\n') if self.tabsize > 0: text = text.expandtabs(self.tabsize) if not text.endswith('\n'): text += '\n' def streamer(): for i, t, v in self.get_tokens_unprocessed(text): yield t, v stream = streamer() if not unfiltered: stream = apply_filters(stream, self.filters, self) return stream def get_tokens_unprocessed(self, text): """ Return an iterable of (tokentype, value) pairs. In subclasses, implement this method as a generator to maximize effectiveness. """ raise NotImplementedError class DelegatingLexer(Lexer): """ This lexer takes two lexer as arguments. A root lexer and a language lexer. First everything is scanned using the language lexer, afterwards all ``Other`` tokens are lexed using the root lexer. The lexers from the ``template`` lexer package use this base lexer. """ def __init__(self, _root_lexer, _language_lexer, _needle=Other, **options): self.root_lexer = _root_lexer(**options) self.language_lexer = _language_lexer(**options) self.needle = _needle Lexer.__init__(self, **options) def get_tokens_unprocessed(self, text): buffered = '' insertions = [] lng_buffer = [] for i, t, v in self.language_lexer.get_tokens_unprocessed(text): if t is self.needle: if lng_buffer: insertions.append((len(buffered), lng_buffer)) lng_buffer = [] buffered += v else: lng_buffer.append((i, t, v)) if lng_buffer: insertions.append((len(buffered), lng_buffer)) return do_insertions(insertions, self.root_lexer.get_tokens_unprocessed(buffered)) #------------------------------------------------------------------------------- # RegexLexer and ExtendedRegexLexer # class include(str): """ Indicates that a state should include rules from another state. """ pass class combined(tuple): """ Indicates a state combined from multiple states. """ def __new__(cls, *args): return tuple.__new__(cls, args) def __init__(self, *args): # tuple.__init__ doesn't do anything pass class _PseudoMatch(object): """ A pseudo match object constructed from a string. """ def __init__(self, start, text): self._text = text self._start = start def start(self, arg=None): return self._start def end(self, arg=None): return self._start + len(self._text) def group(self, arg=None): if arg: raise IndexError('No such group') return self._text def groups(self): return (self._text,) def groupdict(self): return {} def bygroups(*args): """ Callback that yields multiple actions for each group in the match. """ def callback(lexer, match, ctx=None): for i, action in enumerate(args): if action is None: continue elif type(action) is _TokenType: data = match.group(i + 1) if data: yield match.start(i + 1), action, data else: if ctx: ctx.pos = match.start(i + 1) for item in action(lexer, _PseudoMatch(match.start(i + 1), match.group(i + 1)), ctx): if item: yield item if ctx: ctx.pos = match.end() return callback class _This(object): """ Special singleton used for indicating the caller class. Used by ``using``. """ this = _This() def using(_other, **kwargs): """ Callback that processes the match with a different lexer. The keyword arguments are forwarded to the lexer, except `state` which is handled separately. `state` specifies the state that the new lexer will start in, and can be an enumerable such as ('root', 'inline', 'string') or a simple string which is assumed to be on top of the root state. Note: For that to work, `_other` must not be an `ExtendedRegexLexer`. """ gt_kwargs = {} if 'state' in kwargs: s = kwargs.pop('state') if isinstance(s, (list, tuple)): gt_kwargs['stack'] = s else: gt_kwargs['stack'] = ('root', s) if _other is this: def callback(lexer, match, ctx=None): # if keyword arguments are given the callback # function has to create a new lexer instance if kwargs: # XXX: cache that somehow kwargs.update(lexer.options) lx = lexer.__class__(**kwargs) else: lx = lexer s = match.start() for i, t, v in lx.get_tokens_unprocessed(match.group(), **gt_kwargs): yield i + s, t, v if ctx: ctx.pos = match.end() else: def callback(lexer, match, ctx=None): # XXX: cache that somehow kwargs.update(lexer.options) lx = _other(**kwargs) s = match.start() for i, t, v in lx.get_tokens_unprocessed(match.group(), **gt_kwargs): yield i + s, t, v if ctx: ctx.pos = match.end() return callback class RegexLexerMeta(LexerMeta): """ Metaclass for RegexLexer, creates the self._tokens attribute from self.tokens on the first instantiation. """ def _process_state(cls, unprocessed, processed, state): assert type(state) is str, "wrong state name %r" % state assert state[0] != '#', "invalid state name %r" % state if state in processed: return processed[state] tokens = processed[state] = [] rflags = cls.flags for tdef in unprocessed[state]: if isinstance(tdef, include): # it's a state reference assert tdef != state, "circular state reference %r" % state tokens.extend(cls._process_state(unprocessed, processed, str(tdef))) continue assert type(tdef) is tuple, "wrong rule def %r" % tdef try: rex = re.compile(tdef[0], rflags).match except Exception, err: raise ValueError("uncompilable regex %r in state %r of %r: %s" % (tdef[0], state, cls, err)) assert type(tdef[1]) is _TokenType or callable(tdef[1]), \ 'token type must be simple type or callable, not %r' % (tdef[1],) if len(tdef) == 2: new_state = None else: tdef2 = tdef[2] if isinstance(tdef2, str): # an existing state if tdef2 == '#pop': new_state = -1 elif tdef2 in unprocessed: new_state = (tdef2,) elif tdef2 == '#push': new_state = tdef2 elif tdef2[:5] == '#pop:': new_state = -int(tdef2[5:]) else: assert False, 'unknown new state %r' % tdef2 elif isinstance(tdef2, combined): # combine a new state from existing ones new_state = '_tmp_%d' % cls._tmpname cls._tmpname += 1 itokens = [] for istate in tdef2: assert istate != state, 'circular state ref %r' % istate itokens.extend(cls._process_state(unprocessed, processed, istate)) processed[new_state] = itokens new_state = (new_state,) elif isinstance(tdef2, tuple): # push more than one state for state in tdef2: assert (state in unprocessed or state in ('#pop', '#push')), \ 'unknown new state ' + state new_state = tdef2 else: assert False, 'unknown new state def %r' % tdef2 tokens.append((rex, tdef[1], new_state)) return tokens def process_tokendef(cls, name, tokendefs=None): processed = cls._all_tokens[name] = {} tokendefs = tokendefs or cls.tokens[name] for state in tokendefs.keys(): cls._process_state(tokendefs, processed, state) return processed def __call__(cls, *args, **kwds): if not hasattr(cls, '_tokens'): cls._all_tokens = {} cls._tmpname = 0 if hasattr(cls, 'token_variants') and cls.token_variants: # don't process yet pass else: cls._tokens = cls.process_tokendef('', cls.tokens) return type.__call__(cls, *args, **kwds) class RegexLexer(Lexer): """ Base for simple stateful regular expression-based lexers. Simplifies the lexing process so that you need only provide a list of states and regular expressions. """ __metaclass__ = RegexLexerMeta #: Flags for compiling the regular expressions. #: Defaults to MULTILINE. flags = re.MULTILINE #: Dict of ``{'state': [(regex, tokentype, new_state), ...], ...}`` #: #: The initial state is 'root'. #: ``new_state`` can be omitted to signify no state transition. #: If it is a string, the state is pushed on the stack and changed. #: If it is a tuple of strings, all states are pushed on the stack and #: the current state will be the topmost. #: It can also be ``combined('state1', 'state2', ...)`` #: to signify a new, anonymous state combined from the rules of two #: or more existing ones. #: Furthermore, it can be '#pop' to signify going back one step in #: the state stack, or '#push' to push the current state on the stack #: again. #: #: The tuple can also be replaced with ``include('state')``, in which #: case the rules from the state named by the string are included in the #: current one. tokens = {} def get_tokens_unprocessed(self, text, stack=('root',)): """ Split ``text`` into (tokentype, text) pairs. ``stack`` is the inital stack (default: ``['root']``) """ pos = 0 tokendefs = self._tokens statestack = list(stack) statetokens = tokendefs[statestack[-1]] while 1: for rexmatch, action, new_state in statetokens: m = rexmatch(text, pos) if m: # print rex.pattern if type(action) is _TokenType: yield pos, action, m.group() else: for item in action(self, m): yield item pos = m.end() if new_state is not None: # state transition if isinstance(new_state, tuple): for state in new_state: if state == '#pop': statestack.pop() elif state == '#push': statestack.append(statestack[-1]) else: statestack.append(state) elif isinstance(new_state, int): # pop del statestack[new_state:] elif new_state == '#push': statestack.append(statestack[-1]) else: assert False, "wrong state def: %r" % new_state statetokens = tokendefs[statestack[-1]] break else: try: if text[pos] == '\n': # at EOL, reset state to "root" pos += 1 statestack = ['root'] statetokens = tokendefs['root'] yield pos, Text, u'\n' continue yield pos, Error, text[pos] pos += 1 except IndexError: break class LexerContext(object): """ A helper object that holds lexer position data. """ def __init__(self, text, pos, stack=None, end=None): self.text = text self.pos = pos self.end = end or len(text) # end=0 not supported ;-) self.stack = stack or ['root'] def __repr__(self): return 'LexerContext(%r, %r, %r)' % ( self.text, self.pos, self.stack) class ExtendedRegexLexer(RegexLexer): """ A RegexLexer that uses a context object to store its state. """ def get_tokens_unprocessed(self, text=None, context=None): """ Split ``text`` into (tokentype, text) pairs. If ``context`` is given, use this lexer context instead. """ tokendefs = self._tokens if not context: ctx = LexerContext(text, 0) statetokens = tokendefs['root'] else: ctx = context statetokens = tokendefs[ctx.stack[-1]] text = ctx.text while 1: for rexmatch, action, new_state in statetokens: m = rexmatch(text, ctx.pos, ctx.end) if m: if type(action) is _TokenType: yield ctx.pos, action, m.group() ctx.pos = m.end() else: for item in action(self, m, ctx): yield item if not new_state: # altered the state stack? statetokens = tokendefs[ctx.stack[-1]] # CAUTION: callback must set ctx.pos! if new_state is not None: # state transition if isinstance(new_state, tuple): ctx.stack.extend(new_state) elif isinstance(new_state, int): # pop del ctx.stack[new_state:] elif new_state == '#push': ctx.stack.append(ctx.stack[-1]) else: assert False, "wrong state def: %r" % new_state statetokens = tokendefs[ctx.stack[-1]] break else: try: if ctx.pos >= ctx.end: break if text[ctx.pos] == '\n': # at EOL, reset state to "root" ctx.pos += 1 ctx.stack = ['root'] statetokens = tokendefs['root'] yield ctx.pos, Text, u'\n' continue yield ctx.pos, Error, text[ctx.pos] ctx.pos += 1 except IndexError: break def do_insertions(insertions, tokens): """ Helper for lexers which must combine the results of several sublexers. ``insertions`` is a list of ``(index, itokens)`` pairs. Each ``itokens`` iterable should be inserted at position ``index`` into the token stream given by the ``tokens`` argument. The result is a combined token stream. TODO: clean up the code here. """ insertions = iter(insertions) try: index, itokens = insertions.next() except StopIteration: # no insertions for item in tokens: yield item return realpos = None insleft = True # iterate over the token stream where we want to insert # the tokens from the insertion list. for i, t, v in tokens: # first iteration. store the postition of first item if realpos is None: realpos = i oldi = 0 while insleft and i + len(v) >= index: tmpval = v[oldi:index - i] yield realpos, t, tmpval realpos += len(tmpval) for it_index, it_token, it_value in itokens: yield realpos, it_token, it_value realpos += len(it_value) oldi = index - i try: index, itokens = insertions.next() except StopIteration: insleft = False break # not strictly necessary yield realpos, t, v[oldi:] realpos += len(v) - oldi # leftover tokens if insleft: # no normal tokens, set realpos to zero realpos = realpos or 0 for p, t, v in itokens: yield realpos, t, v realpos += len(v) PK48Bc==pygments/token.pyc; ԑFc@sdZyeWn ej odklZnXdefdYZeZeiZei Z ei Z ei Z ei Z ei Z eiZeiZeiZeiZeiZeiZeiZee_ee_ee_dZdZhed<ed<e d<e d<e d <e d <e id <e id <e id <e id<e id<e d<e id<e id<e iid<e id<e id<e id<e i d<e i!d<e i"d<e i#d<e i$d<e i%d<e i d<e i&d<e i'd<e i'id <e i'i(d!<e i'i)d"<ed#<ei*d$<ed%<ei+d&<ei,d'<ei-d(<ei.d)<ei/d*<ei0d+<ei1d,<ei d-<ei2d.<ei3d/<ei4d0<ed1<ei5d2<ei6d3<ei7d4<ei7i8d5<ei9d6<ed7<ei:d8<ed9<ed:<ei;d;<ei<d<<ei3d=<ei=d><ed?<ei>d@<ei?dA<ei dB<ei@dC<eiAdD<eiBdE<eiCdF<eiDdG<eiEdH<eiFdIW|SdS(s} Convert a string into a token type:: >>> string_to_token('String.Double') Token.Literal.String.Double >>> string_to_token('Token.Literal.Number') Token.Literal.Number >>> string_to_token('') Token Tokens that are already tokens are returned unchanged: >>> string_to_token(String) Token.Literal.String s.N(s isinstancesss _TokenTypesTokensnodessplitsitemsgetattr(sssnodesitem((s2build/bdist.darwin-8.0.1-x86/egg/pygments/token.pysstring_to_tokentype]ssswserrsxskskcskdskpskrsktsnsnasnbsbpsncsnosndsnisnesnfspysnlsnnsnxsntsnvsvcsvgsvislsldssssbsscssdss2ssesshssissxssrss1ssssmsmfsmhsmisilsmososowspscscmscpsc1scssgsgdsgesgrsghsgisgosgpsgssgusgts__main__Ns---sOkay!is%r has more than one key: %r(Ts__doc__ssets NameErrorssetssSetstuples _TokenTypesTokensTexts WhitespacesErrorsOthersKeywordsNamesLiteralsStringsNumbers PunctuationsOperatorsCommentsGenericsis_token_subtypesstring_to_tokentypesConstants DeclarationsPseudosReservedsTypes AttributesBuiltinsClasss DecoratorsEntitys ExceptionsFunctionsPropertysLabels NamespacesTagsVariablesGlobalsInstancesDatesBackticksCharsDocsDoublesEscapesHeredocsInterpolsRegexsSinglesSymbolsFloatsHexsIntegersLongsOctsWords MultilinesPreprocsSpecialsDeletedsEmphsHeadingsInsertedsOutputsPromptsStrongs Subheadings TracebacksSTANDARD_TYPESs__name__ssysscopysstpsts iteritemssksvs setdefaultsappendslensexit(ssetsSTANDARD_TYPESs WhitespacesGenerics PunctuationsLiteralsOthersNamesStringsis_token_subtypesstring_to_tokentypesKeywordssyssTokensErrorsComments _TokenTypesTextsksNumbersstpstsvsOperator((s2build/bdist.darwin-8.0.1-x86/egg/pygments/token.pys? sV&                  *T      PKN7LF pygments/__init__.py# -*- coding: utf-8 -*- """ Pygments ~~~~~~~~ Pygments is a syntax highlighting package written in Python. It is a generic syntax highlighter for general use in all kinds of software such as forum systems, wikis or other applications that need to prettify source code. Highlights are: * a wide range of common languages and markup formats is supported * special attention is paid to details, increasing quality by a fair amount * support for new languages and formats are added easily * a number of output formats, presently HTML, LaTeX, RTF, SVG and ANSI sequences * it is usable as a command-line tool and as a library * ... and it highlights even Brainfuck! :copyright: 2006-2007 by Georg Brandl, Armin Ronacher and others. :license: BSD, see LICENSE for more details. """ __version__ = '0.9' __author__ = 'Georg Brandl ' __url__ = 'http://pygments.org/' __license__ = 'BSD License' __docformat__ = 'restructuredtext' __all__ = ['lex', 'format', 'highlight'] import sys, os from StringIO import StringIO from cStringIO import StringIO as CStringIO def lex(code, lexer): """ Lex ``code`` with ``lexer`` and return an iterable of tokens. """ try: return lexer.get_tokens(code) except TypeError, err: if isinstance(err.args[0], str) and \ 'unbound method get_tokens' in err.args[0]: raise TypeError('lex() argument must be a lexer instance, ' 'not a class') raise def format(tokens, formatter, outfile=None): """ Format a tokenlist ``tokens`` with the formatter ``formatter``. If ``outfile`` is given and a valid file object (an object with a ``write`` method), the result will be written to it, otherwise it is returned as a string. """ try: if not outfile: # if we want Unicode output, we have to use Python StringIO realoutfile = formatter.encoding and CStringIO() or StringIO() formatter.format(tokens, realoutfile) return realoutfile.getvalue() else: formatter.format(tokens, outfile) except TypeError, err: if isinstance(err.args[0], str) and \ 'unbound method format' in err.args[0]: raise TypeError('format() argument must be a formatter instance, ' 'not a class') raise def highlight(code, lexer, formatter, outfile=None): """ Lex ``code`` with ``lexer`` and format it with the formatter ``formatter``. If ``filters`` are given they will be applied on the token stream. If ``outfile`` is given and a valid file object (an object with a ``write`` method), the result will be written to it, otherwise it is returned as a string. """ return format(lex(code, lexer), formatter, outfile) if __name__ == '__main__': from pygments.cmdline import main sys.exit(main(sys.argv)) PK48~^``pygments/style.pyc; ӑFc@sIdZdklZlZdefdYZdefdYZdS(s pygments.style ~~~~~~~~~~~~~~ Basic style object. :copyright: 2006-2007 by Georg Brandl. :license: BSD, see LICENSE for more details. (sTokensSTANDARD_TYPESs StyleMetacBs>tZdZdZdZdZdZdZRS(Nc Cs}ti||||}x/tD]'}||i jod|i |>> string_to_token('String.Double') Token.Literal.String.Double >>> string_to_token('Token.Literal.Number') Token.Literal.Number >>> string_to_token('') Token Tokens that are already tokens are returned unchanged: >>> string_to_token(String) Token.Literal.String """ if isinstance(s, _TokenType): return s if not s: return Token node = Token for item in s.split('.'): node = getattr(node, item) return node # Map standard token types to short names, used in CSS class naming. # If you add a new item, please be sure to run this file to perform # a consistency check for duplicate values. STANDARD_TYPES = { Token: '', Text: '', Whitespace: 'w', Error: 'err', Other: 'x', Keyword: 'k', Keyword.Constant: 'kc', Keyword.Declaration: 'kd', Keyword.Pseudo: 'kp', Keyword.Reserved: 'kr', Keyword.Type: 'kt', Name: 'n', Name.Attribute: 'na', Name.Builtin: 'nb', Name.Builtin.Pseudo: 'bp', Name.Class: 'nc', Name.Constant: 'no', Name.Decorator: 'nd', Name.Entity: 'ni', Name.Exception: 'ne', Name.Function: 'nf', Name.Property: 'py', Name.Label: 'nl', Name.Namespace: 'nn', Name.Other: 'nx', Name.Tag: 'nt', Name.Variable: 'nv', Name.Variable.Class: 'vc', Name.Variable.Global: 'vg', Name.Variable.Instance: 'vi', Literal: 'l', Literal.Date: 'ld', String: 's', String.Backtick: 'sb', String.Char: 'sc', String.Doc: 'sd', String.Double: 's2', String.Escape: 'se', String.Heredoc: 'sh', String.Interpol: 'si', String.Other: 'sx', String.Regex: 'sr', String.Single: 's1', String.Symbol: 'ss', Number: 'm', Number.Float: 'mf', Number.Hex: 'mh', Number.Integer: 'mi', Number.Integer.Long: 'il', Number.Oct: 'mo', Operator: 'o', Operator.Word: 'ow', Punctuation: 'p', Comment: 'c', Comment.Multiline: 'cm', Comment.Preproc: 'cp', Comment.Single: 'c1', Comment.Special: 'cs', Generic: 'g', Generic.Deleted: 'gd', Generic.Emph: 'ge', Generic.Error: 'gr', Generic.Heading: 'gh', Generic.Inserted: 'gi', Generic.Output: 'go', Generic.Prompt: 'gp', Generic.Strong: 'gs', Generic.Subheading: 'gu', Generic.Traceback: 'gt', } if __name__ == '__main__': import sys # sanity check for token name dict: no duplicate entries! stp = STANDARD_TYPES.copy() stp[Token] = '---' # Token and Text do conflict, that is okay t = {} for k, v in stp.iteritems(): t.setdefault(v, []).append(k) if len(t) == len(stp): print 'Okay!' sys.exit() for k, v in t.iteritems(): if len(v) > 1: print "%r has more than one key: %r" % (k, v) PK48;~  pygments/console.pyc; ӑFc@sndZdZhZded)?\s* ]*> ''') tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?(?uism)') class ClassNotFound(ValueError): """ If one of the get_*_by_* functions didn't find a matching class. """ class OptionError(Exception): pass def get_choice_opt(options, optname, allowed, default=None): string = options.get(optname, default) if string not in allowed: raise OptionError('Value for option %s must be one of %s' % (optname, ', '.join(map(str, allowed)))) return string def get_bool_opt(options, optname, default=None): string = options.get(optname, default) if isinstance(string, bool): return string elif isinstance(string, int): return bool(string) elif not isinstance(string, basestring): raise OptionError('Invalid type %r for option %s; use ' '1/0, yes/no, true/false, on/off' % ( string, optname)) elif string.lower() in ('1', 'yes', 'true', 'on'): return True elif string.lower() in ('0', 'no', 'false', 'off'): return False else: raise OptionError('Invalid value %r for option %s; use ' '1/0, yes/no, true/false, on/off' % ( string, optname)) def get_int_opt(options, optname, default=None): string = options.get(optname, default) try: return int(string) except TypeError: raise OptionError('Invalid type %r for option %s; you ' 'must give an integer value' % ( string, optname)) except ValueError: raise OptionError('Invalid value %r for option %s; you ' 'must give an integer value' % ( string, optname)) def get_list_opt(options, optname, default=None): val = options.get(optname, default) if isinstance(val, basestring): return val.split() elif isinstance(val, (list, tuple)): return list(val) else: raise OptionError('Invalid type %r for option %s; you ' 'must give a list value' % ( val, optname)) def docstring_headline(obj): if not obj.__doc__: return '' res = [] for line in obj.__doc__.strip().splitlines(): if line.strip(): res.append(" " + line.strip()) else: break return ''.join(res).lstrip() def make_analysator(f): """ Return a static text analysation function that returns float values. """ def text_analyse(text): rv = f(text) if not rv: return 0.0 return min(1.0, max(0.0, float(rv))) text_analyse.__doc__ = f.__doc__ return staticmethod(text_analyse) def shebang_matches(text, regex): """ Check if the given regular expression matches the last part of the shebang if one exists. >>> from pygments.util import shebang_matches >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?') True >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?') True >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?') False >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?') False >>> shebang_matches('#!/usr/bin/startsomethingwith python', ... r'python(2\.\d)?') True It also checks for common windows executable file extensions:: >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?') True Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does the same as ``'perl -e'``) Note that this method automatically searches the whole string (eg: the regular expression is wrapped in ``'^$'``) """ index = text.find('\n') if index >= 0: first_line = text[:index].lower() else: first_line = text.lower() if first_line.startswith('#!'): try: found = [x for x in split_path_re.split(first_line[2:].strip()) if x and not x.startswith('-')][-1] except IndexError: return False regex = re.compile('^%s(\.(exe|cmd|bat|bin))?$' % regex, re.IGNORECASE) if regex.search(found) is not None: return True return False def doctype_matches(text, regex): """ Check if the doctype matches a regular expression (if present). Note that this method only checks the first part of a DOCTYPE. eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"' """ m = doctype_lookup_re.match(text) if m is None: return False doctype = m.group(2) return re.compile(regex).match(doctype.strip()) is not None def html_doctype_matches(text): """ Check if the file looks like it has a html doctype. """ return doctype_matches(text, r'html\s+PUBLIC\s+"-//W3C//DTD X?HTML.*') def looks_like_xml(text): """ Check if a doctype exists or if we have some tags. """ m = doctype_lookup_re.match(text) if m is not None: return True return tag_re.search(text) is not None PK48|: pygments/scanner.pyc; ӑFc@s?dZdkZdefdYZdefdYZdS(s pygments.scanner ~~~~~~~~~~~~~~~~ This library implements a regex based scanner. Some languages like Pascal are easy to parse but have some keywords that depend on the context. Because of this it's impossible to lex that just by using a regular expression lexer like the `RegexLexer`. Have a look at the `DelphiLexer` to get an idea of how to use this scanner. :copyright: 2006-2007 by Armin Ronacher. :license: BSD, see LICENSE for more details. Ns EndOfTextcBstZdZRS(sZ Raise if end of text is reached and the user tried to call a match function. (s__name__s __module__s__doc__(((s4build/bdist.darwin-8.0.1-x86/egg/pygments/scanner.pys EndOfTexts sScannercBsbtZdZddZdZeeeiZdZdZdZdZ dZ RS( sp Simple scanner All method patterns are regular expression strings (not compiled expressions!) icCsR||_t||_d|_d|_||_t|_ t|_ h|_ dS(s{ :param text: The text which should be scanned :param flags: default regular expression flags iN( stextsselfsdataslens data_lengths start_posspossflagssNoneslastsmatchs _re_cache(sselfstextsflags((s4build/bdist.darwin-8.0.1-x86/egg/pygments/scanner.pys__init__$s      cCs|i|ijSdS(s.`True` if the scanner reached the end of text.N(sselfsposs data_length(sself((s4build/bdist.darwin-8.0.1-x86/egg/pygments/scanner.pyseos2scCsh|io tn||ijo ti||i|i|(sselfs __class__s__name__sposs data_length(sself((s4build/bdist.darwin-8.0.1-x86/egg/pygments/scanner.pys__repr__cs( s__name__s __module__s__doc__s__init__seosspropertyscheckstestsscansget_chars__repr__(((s4build/bdist.darwin-8.0.1-x86/egg/pygments/scanner.pysScanners      (s__doc__sres RuntimeErrors EndOfTextsobjectsScanner(sres EndOfTextsScanner((s4build/bdist.darwin-8.0.1-x86/egg/pygments/scanner.pys?s PK48K*Kt t pygments/plugin.pyc; ӑFc@sodZy dkZWnej o eZnXdZdZdZdZdZdZ dZ d Z dS( s pygments.plugin ~~~~~~~~~~~~~~~ Pygments setuptools plugin interface. The methods defined here also work if setuptools isn't installed but they just return nothing. lexer plugins:: [pygments.lexers] yourlexer = yourmodule:YourLexer formatter plugins:: [pygments.formatters] yourformatter = yourformatter:YourFormatter /.ext = yourformatter:YourFormatter As you can see, you can define extensions for the formatter with a leading slash. syntax plugins:: [pygments.styles] yourstyle = yourstyle:YourStyle filter plugin:: [pygments.filter] yourfilter = yourfilter:YourFilter :copyright: 2006-2007 by Armin Ronacher. :license: BSD, see LICENSE for more details. Nspygments.lexersspygments.formattersspygments.stylesspygments.filtersccs=ttjodSnx!titD]}|iVq%WdS(N(s pkg_resourcessNonesiter_entry_pointssLEXER_ENTRY_POINTs entrypointsload(s entrypoint((s3build/bdist.darwin-8.0.1-x86/egg/pygments/plugin.pysfind_plugin_lexers1s  ccsFttjodSnx*titD]}|i|ifVq%WdS(N(s pkg_resourcessNonesiter_entry_pointssFORMATTER_ENTRY_POINTs entrypointsnamesload(s entrypoint((s3build/bdist.darwin-8.0.1-x86/egg/pygments/plugin.pysfind_plugin_formatters8s  ccsFttjodSnx*titD]}|i|ifVq%WdS(N(s pkg_resourcessNonesiter_entry_pointssSTYLE_ENTRY_POINTs entrypointsnamesload(s entrypoint((s3build/bdist.darwin-8.0.1-x86/egg/pygments/plugin.pysfind_plugin_styles?s  ccsFttjodSnx*titD]}|i|ifVq%WdS(N(s pkg_resourcessNonesiter_entry_pointssFILTER_ENTRY_POINTs entrypointsnamesload(s entrypoint((s3build/bdist.darwin-8.0.1-x86/egg/pygments/plugin.pysfind_plugin_filtersFs  ( s__doc__s pkg_resourcess ImportErrorsNonesLEXER_ENTRY_POINTsFORMATTER_ENTRY_POINTsSTYLE_ENTRY_POINTsFILTER_ENTRY_POINTsfind_plugin_lexerssfind_plugin_formatterssfind_plugin_stylessfind_plugin_filters( sSTYLE_ENTRY_POINTsfind_plugin_filterssFILTER_ENTRY_POINTsfind_plugin_lexerss pkg_resourcessLEXER_ENTRY_POINTsfind_plugin_formatterssfind_plugin_stylessFORMATTER_ENTRY_POINT((s3build/bdist.darwin-8.0.1-x86/egg/pygments/plugin.pys?%s     PK48nۙ,,pygments/cmdline.pyc; <Gc@sdZdkZdkZdklZdklZlZlZdk l Z l Z l Z dk lZlZlZlZdklZlZlZlZlZdklZlZdklZlZd Zd Zd Z d Z!d Z"ei#dZ$dS(s pygments.cmdline ~~~~~~~~~~~~~~~~ Command line interface. :copyright: 2006-2007 by Georg Brandl. :license: BSD, see LICENSE for more details. N(sdedent(s __version__s __author__s highlight(s ClassNotFounds OptionErrorsdocstring_headline(sget_all_lexerssget_lexer_by_namesget_lexer_for_filenamesfind_lexer_class(sget_all_formatterssget_formatter_by_namesget_formatter_for_filenamesfind_formatter_classsTerminalFormatter(sget_all_filterssfind_filter_class(sget_all_stylessget_style_by_namesxUsage: %s [-l ] [-F [:]] [-f ] [-O ] [-P ] [-o ] [] %s -S

%(title)s

s5 %(title)s

%(title)s

s cBstZdZdZdgZddgZdZdZdZe dZ d Z d Z d Z d Zd ZdZdZdZdZRS(s Format tokens as HTML 4 ```` tags within a ``
`` tag, wrapped
    in a ``
`` tag. The ``
``'s CSS class can be set by the `cssclass` option. If the `linenos` option is set to ``"table"``, the ``
`` is
    additionally wrapped inside a ```` which has one row and two
    cells: one containing the line numbers and one containing the code.
    Example:

    .. sourcecode:: html

        
1
            2
def foo(bar):
              pass
            
(whitespace added to improve clarity). Wrapping can be disabled using the `nowrap` option. With the `full` option, a complete HTML 4 document is output, including the style definitions inside a ``

%(title)s

''' DOC_HEADER_EXTERNALCSS = '''\ %(title)s

%(title)s

''' DOC_FOOTER = '''\ ''' class HtmlFormatter(Formatter): r""" Format tokens as HTML 4 ```` tags within a ``
`` tag, wrapped
    in a ``
`` tag. The ``
``'s CSS class can be set by the `cssclass` option. If the `linenos` option is set to ``"table"``, the ``
`` is
    additionally wrapped inside a ```` which has one row and two
    cells: one containing the line numbers and one containing the code.
    Example:

    .. sourcecode:: html

        
1
            2
def foo(bar):
              pass
            
(whitespace added to improve clarity). Wrapping can be disabled using the `nowrap` option. With the `full` option, a complete HTML 4 document is output, including the style definitions inside a ``