#! /usr/bin/env python2.2 # # Syntax Hilight a python source file using CSS. # Copyright (C) 2002 Michael Urman # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # """ Highlight a python source file using CSS and clean HTML. The default colors are based on my vim colorscheme, which i got from vim.sf.net, called manxome. """ __version__ = '0.1' __date__ = '2002/06/06' __author__ = 'Michael Urman (mu on irc.openprojects.net)' from __future__ import generators import sys, re, tokenize attributes = [ ('body', 'black', 'white', None), ('.cm', '#000400', '#00fc00', None), ('.fn', None, '#60a8d8', None), ('.kw', None, '#00a8a8', None), ('.na', None, '#88f', None), ('.op', None, '#ff0', None), ('.dl', None, '#f00', None), ('.as', None, 'brown', None), ('.st', None, '#e8ece8', None), ('.sc', None, '#00fcf8', None), ('.ex', None, '#d860a8', None), ('.er', '#800', '#ffa', None), ] tokenhash = { tokenize.NAME: 'na', tokenize.ENDMARKER: None, tokenize.NUMBER: 'nu', tokenize.STRING: 'st', tokenize.BACKQUOTE: 'st', tokenize.ERRORTOKEN: 'er', tokenize.OP: 'op', tokenize.NEWLINE: None, tokenize.INDENT: None, tokenize.DEDENT: None, tokenize.NL: None, tokenize.COMMENT: 'cm', } keywords = {} for k in ('and', 'assert', 'break', 'class', 'continue', 'def', 'del', 'elif', 'else', 'except', 'exec', 'finally', 'for', 'from', 'global', 'if', 'in', 'is', 'import', 'lambda', 'not', 'or', 'pass', 'print', 'return', 'raise', 'try', 'while', 'yield'): keywords[k] = 1; exceptions = {} for k in ('ArithmeticError', 'AssertionError', 'AttributeError', 'DeprecationWarning', 'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError', 'IOError', 'ImportError', 'IndentiationError', 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'NotImplementedError', 'OSError', 'OverflowError', 'OverflowWarning', 'ReferenceError', 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeError', 'UserWarning', 'ValueError', 'Warning', 'WindowsError', 'ZeroDivisionError'): exceptions[k] = 1; assign = {} for k in ('=', '+=', '-=', '/=', '%=', '&=', '|=', '^=', '<<=', '>>=', '**=', '//='): assign[k] = 1 oper = {} for k in ('+', '-', '*', '/', '|', '&', '%', '~', '^', '<<', '>>', '**', '//'): oper[k] = 1 delim = {} for k in ('(', ')', '[', ']', '{', '}'): delim[k] = 1 punct = {} for k in (',', ':', '.'): punct[k] = 1 cmp = {} for k in ('<', '>', '==', '!=', '<=', '>='): cmp[k] = 1 def getstate(type, token): try: state = tokenhash[type] except KeyError: raise ValueError('Token number %s (%s) not handled' % (type, token)) if state == 'na': if token in keywords: state = 'kw' elif token in exceptions: state = 'ex' else: state = None elif state == 'op': state = None #if token in delim: state = 'dl' #elif token in oper: state = 'op' #elif token in assign: state = 'as' #else: state = None elif state == 'nu': state = None return state def ws(spos, epos): if epos[0] == spos[0]: return ' ' * (spos[1] - epos[1]) else: return ' ' * (spos[1]) def hilite(src, dst, title='Syntax Hilight'): dst.write('''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> <html> <head>\n''') dst.write(' <title>%s</title>\n' % title) dst.write(' <style type="text/css"><!--\n') for sel, bg, fg, style in attributes: dst.write('%s {' % sel) if bg is not None: dst.write(' background-color: %s;' % bg) if fg is not None: dst.write(' color: %s;' % fg) if style is not None: dst.write(' font-style: %s;' % style) dst.write(' }\n') dst.write(''' --></style> </head> <body> <pre>''') state = None oldstate = None nextfunc = 0 lpos=(0,0) functions = {} for type, token, spos, epos, line in tokenize.generate_tokens(src.readline): newstate = getstate(type, token) if nextfunc: if newstate is None: newstate = 'fn' #functions[token] = 1 nextfunc = 0 #elif newstate is None and functions.has_key(token): # newstate = 'fn' if newstate == 'kw': #print "«%s»" % token if token in ('class', 'def'): nextfunc = 1 #if token and token[0] not in (' ', '\n'): print newstate, token token = re.sub('&', '&', token) token = re.sub('<', '<', token) token = re.sub('>', '>', token) token = re.sub('"', '"', token) if newstate == 'st': token = re.sub(r'^([ur]?("""|"|%s))((.|\n)*)\2$' % "'''|'", r'\1<span class="sc">\3</span>\2', token) if newstate == state: dst.write(ws(spos, lpos)) dst.write(token) elif state is None: dst.write(ws(spos, lpos)) dst.write('<span class="%s">' % newstate) dst.write(token) elif newstate is None: dst.write('</span>') dst.write(ws(spos, lpos)) dst.write(token) else: dst.write('</span>') dst.write(ws(spos, lpos)) dst.write('<span class="%s">' % newstate) dst.write(token) state = newstate lpos = epos dst.write(''' </pre></body> </html>\n''') dst.flush() def main(): import os title = 'Syntax Hilight' if len(sys.argv) == 1: src = sys.stdin dst = sys.stdout elif len(sys.argv) == 2: if sys.argv[1] == '-': src = sys.stdin else: src = open(sys.argv[1], 'r', 1) title = os.path.basename(sys.argv[1]) dst = sys.stdout elif len(sys.argv) == 3: if sys.argv[1] == '-': src = sys.stdin else: src = open(sys.argv[1], 'r', 1) title = os.path.basename(sys.argv[1]) if sys.argv[2] == '-': dst = sys.stdout else: dst = open(sys.argv[2], 'w', 1) hilite(src, dst, title) if __name__ == '__main__': main()