#! /usr/bin/env python2.2 # # Syntax Hilight a python source file using CSS. # Copyright (C) 2002 Michael Urman # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # """ Highlight a python source file using CSS and clean HTML. The default colors are based on my vim colorscheme, which i got from vim.sf.net, called manxome. """ __version__ = '0.1' __date__ = '2002/06/06' __author__ = 'Michael Urman (mu on irc.openprojects.net)' from __future__ import generators import sys, re, tokenize attributes = [ ('body', 'black', 'white', None), ('.cm', '#000400', '#00fc00', None), ('.fn', None, '#60a8d8', None), ('.kw', None, '#00a8a8', None), ('.na', None, '#88f', None), ('.op', None, '#ff0', None), ('.dl', None, '#f00', None), ('.as', None, 'brown', None), ('.st', None, '#e8ece8', None), ('.sc', None, '#00fcf8', None), ('.ex', None, '#d860a8', None), ('.er', '#800', '#ffa', None), ] tokenhash = { tokenize.NAME: 'na', tokenize.ENDMARKER: None, tokenize.NUMBER: 'nu', tokenize.STRING: 'st', tokenize.BACKQUOTE: 'st', tokenize.ERRORTOKEN: 'er', tokenize.OP: 'op', tokenize.NEWLINE: None, tokenize.INDENT: None, tokenize.DEDENT: None, tokenize.NL: None, tokenize.COMMENT: 'cm', } keywords = {} for k in ('and', 'assert', 'break', 'class', 'continue', 'def', 'del', 'elif', 'else', 'except', 'exec', 'finally', 'for', 'from', 'global', 'if', 'in', 'is', 'import', 'lambda', 'not', 'or', 'pass', 'print', 'return', 'raise', 'try', 'while', 'yield'): keywords[k] = 1; exceptions = {} for k in ('ArithmeticError', 'AssertionError', 'AttributeError', 'DeprecationWarning', 'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError', 'IOError', 'ImportError', 'IndentiationError', 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'NotImplementedError', 'OSError', 'OverflowError', 'OverflowWarning', 'ReferenceError', 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeError', 'UserWarning', 'ValueError', 'Warning', 'WindowsError', 'ZeroDivisionError'): exceptions[k] = 1; assign = {} for k in ('=', '+=', '-=', '/=', '%=', '&=', '|=', '^=', '<<=', '>>=', '**=', '//='): assign[k] = 1 oper = {} for k in ('+', '-', '*', '/', '|', '&', '%', '~', '^', '<<', '>>', '**', '//'): oper[k] = 1 delim = {} for k in ('(', ')', '[', ']', '{', '}'): delim[k] = 1 punct = {} for k in (',', ':', '.'): punct[k] = 1 cmp = {} for k in ('<', '>', '==', '!=', '<=', '>='): cmp[k] = 1 def getstate(type, token): try: state = tokenhash[type] except KeyError: raise ValueError('Token number %s (%s) not handled' % (type, token)) if state == 'na': if token in keywords: state = 'kw' elif token in exceptions: state = 'ex' else: state = None elif state == 'op': state = None #if token in delim: state = 'dl' #elif token in oper: state = 'op' #elif token in assign: state = 'as' #else: state = None elif state == 'nu': state = None return state def ws(spos, epos): if epos[0] == spos[0]: return ' ' * (spos[1] - epos[1]) else: return ' ' * (spos[1]) def hilite(src, dst, title='Syntax Hilight'): dst.write(''' \n''') dst.write(' %s\n' % title) dst.write('
''')

    state = None
    oldstate = None
    nextfunc = 0
    lpos=(0,0)
    functions = {}
    for type, token, spos, epos, line in tokenize.generate_tokens(src.readline):
        newstate = getstate(type, token)
        if nextfunc:
            if newstate is None:
                newstate = 'fn'
                #functions[token] = 1
            nextfunc = 0
        #elif newstate is None and functions.has_key(token):
        #    newstate = 'fn'
        if newstate == 'kw':
            #print "«%s»" % token
            if token in ('class', 'def'):
                nextfunc = 1
        #if token and token[0] not in (' ', '\n'): print newstate, token
        token = re.sub('&', '&', token)
        token = re.sub('<', '<', token)
        token = re.sub('>', '>', token)
        token = re.sub('"', '"', token)
        if newstate == 'st':
            token = re.sub(r'^([ur]?("""|"|%s))((.|\n)*)\2$' % "'''|'",
                           r'\1\3\2', token)
        if newstate == state:
            dst.write(ws(spos, lpos))
            dst.write(token)
        elif state is None:
            dst.write(ws(spos, lpos))
            dst.write('' % newstate)
            dst.write(token)
        elif newstate is None:
            dst.write('')
            dst.write(ws(spos, lpos))
            dst.write(token)
        else:
            dst.write('')
            dst.write(ws(spos, lpos))
            dst.write('' % newstate)
            dst.write(token)
        state = newstate
        lpos = epos

    dst.write('''    
\n''') dst.flush() def main(): import os title = 'Syntax Hilight' if len(sys.argv) == 1: src = sys.stdin dst = sys.stdout elif len(sys.argv) == 2: if sys.argv[1] == '-': src = sys.stdin else: src = open(sys.argv[1], 'r', 1) title = os.path.basename(sys.argv[1]) dst = sys.stdout elif len(sys.argv) == 3: if sys.argv[1] == '-': src = sys.stdin else: src = open(sys.argv[1], 'r', 1) title = os.path.basename(sys.argv[1]) if sys.argv[2] == '-': dst = sys.stdout else: dst = open(sys.argv[2], 'w', 1) hilite(src, dst, title) if __name__ == '__main__': main()