#! /usr/bin/env python2.2
#
#    Syntax Hilight a python source file using CSS.
#    Copyright (C) 2002  Michael Urman
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#

"""
Highlight a python source file using CSS and clean HTML.

The default colors are based on my vim colorscheme, which i got from
vim.sf.net, called manxome.
"""

__version__ = '0.1'
__date__ = '2002/06/06'
__author__ = 'Michael Urman (mu on irc.openprojects.net)'

from __future__ import generators
import sys, re, tokenize

attributes = [
    ('body', 'black', 'white', None),
    ('.cm', '#000400', '#00fc00', None),
    ('.fn', None, '#60a8d8', None),
    ('.kw', None, '#00a8a8', None),
    ('.na', None, '#88f', None),
    ('.op', None, '#ff0', None),
    ('.dl', None, '#f00', None),
    ('.as', None, 'brown', None),
    ('.st', None, '#e8ece8', None),
    ('.sc', None, '#00fcf8', None),
    ('.ex', None, '#d860a8', None),
    ('.er', '#800', '#ffa', None),
]

tokenhash = {
    tokenize.NAME: 'na',
    tokenize.ENDMARKER: None,
    tokenize.NUMBER: 'nu',
    tokenize.STRING: 'st',
    tokenize.BACKQUOTE: 'st',
    tokenize.ERRORTOKEN: 'er',
    tokenize.OP: 'op',
    tokenize.NEWLINE: None,
    tokenize.INDENT: None,
    tokenize.DEDENT: None,
    tokenize.NL: None,
    tokenize.COMMENT: 'cm',
}

keywords = {}
for k in ('and', 'assert', 'break', 'class', 'continue', 'def', 'del', 'elif',
        'else', 'except', 'exec', 'finally', 'for', 'from', 'global', 'if',
        'in', 'is', 'import', 'lambda', 'not', 'or', 'pass', 'print', 'return',
        'raise', 'try', 'while', 'yield'):
    keywords[k] = 1;

exceptions = {}
for k in ('ArithmeticError', 'AssertionError', 'AttributeError',
        'DeprecationWarning', 'EOFError', 'EnvironmentError', 'Exception',
        'FloatingPointError', 'IOError', 'ImportError', 'IndentiationError',
        'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError',
        'MemoryError', 'NameError', 'NotImplementedError', 'OSError',
        'OverflowError', 'OverflowWarning', 'ReferenceError', 'RuntimeError',
        'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError',
        'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError',
        'UnboundLocalError', 'UnicodeError', 'UserWarning', 'ValueError',
        'Warning', 'WindowsError', 'ZeroDivisionError'):
    exceptions[k] = 1;

assign = {}
for k in ('=', '+=', '-=', '/=', '%=', '&=', '|=', '^=', '<<=', '>>=', '**=', '//='):
    assign[k] = 1

oper = {}
for k in ('+', '-', '*', '/', '|', '&', '%', '~', '^', '<<', '>>', '**', '//'):
    oper[k] = 1

delim = {}
for k in ('(', ')', '[', ']', '{', '}'): delim[k] = 1

punct = {}
for k in (',', ':', '.'): punct[k] = 1

cmp = {}
for k in ('<', '>', '==', '!=', '<=', '>='): cmp[k] = 1

def getstate(type, token):
    try:
        state = tokenhash[type]
    except KeyError:
        raise ValueError('Token number %s (%s) not handled' % (type, token))
    if state == 'na':
        if token in keywords:
            state = 'kw'
        elif token in exceptions:
            state = 'ex'
        else: state = None
    elif state == 'op': state = None
        #if token in delim: state = 'dl'
        #elif token in oper: state = 'op'
        #elif token in assign: state = 'as'
        #else: state = None
    elif state == 'nu': state = None
    return state

def ws(spos, epos):
    if epos[0] == spos[0]:
        return ' ' * (spos[1] - epos[1])
    else:
        return ' ' * (spos[1])

def hilite(src, dst, title='Syntax Hilight'):

    dst.write('''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
    "http://www.w3.org/TR/html4/strict.dtd">
<html>
    <head>\n''')
    dst.write('        <title>%s</title>\n' % title)
    dst.write('        <style type="text/css"><!--\n')

    for sel, bg, fg, style in attributes:
        dst.write('%s {' % sel)
        if bg is not None: dst.write(' background-color: %s;' % bg)
        if fg is not None: dst.write(' color: %s;' % fg)
        if style is not None: dst.write(' font-style: %s;' % style)
        dst.write(' }\n')

    dst.write('''        --></style>
    </head>
    <body>
<pre>''')

    state = None
    oldstate = None
    nextfunc = 0
    lpos=(0,0)
    functions = {}
    for type, token, spos, epos, line in tokenize.generate_tokens(src.readline):
        newstate = getstate(type, token)
        if nextfunc:
            if newstate is None:
                newstate = 'fn'
                #functions[token] = 1
            nextfunc = 0
        #elif newstate is None and functions.has_key(token):
        #    newstate = 'fn'
        if newstate == 'kw':
            #print "«%s»" % token
            if token in ('class', 'def'):
                nextfunc = 1
        #if token and token[0] not in (' ', '\n'): print newstate, token
        token = re.sub('&', '&amp;', token)
        token = re.sub('<', '&lt;', token)
        token = re.sub('>', '&gt;', token)
        token = re.sub('"', '&quot;', token)
        if newstate == 'st':
            token = re.sub(r'^([ur]?(&quot;&quot;&quot;|&quot;|%s))((.|\n)*)\2$' % "'''|'",
                           r'\1<span class="sc">\3</span>\2', token)
        if newstate == state:
            dst.write(ws(spos, lpos))
            dst.write(token)
        elif state is None:
            dst.write(ws(spos, lpos))
            dst.write('<span class="%s">' % newstate)
            dst.write(token)
        elif newstate is None:
            dst.write('</span>')
            dst.write(ws(spos, lpos))
            dst.write(token)
        else:
            dst.write('</span>')
            dst.write(ws(spos, lpos))
            dst.write('<span class="%s">' % newstate)
            dst.write(token)
        state = newstate
        lpos = epos

    dst.write('''    </pre></body>
</html>\n''')
    dst.flush()


def main():
    import os
    title = 'Syntax Hilight'
    if len(sys.argv) == 1:
        src = sys.stdin
        dst = sys.stdout
    elif len(sys.argv) == 2:
        if sys.argv[1] == '-':
            src = sys.stdin
        else:
            src = open(sys.argv[1], 'r', 1)
            title = os.path.basename(sys.argv[1])
        dst = sys.stdout
    elif len(sys.argv) == 3:
        if sys.argv[1] == '-':
            src = sys.stdin
        else:
            src = open(sys.argv[1], 'r', 1)
            title = os.path.basename(sys.argv[1])
        if sys.argv[2] == '-':
            dst = sys.stdout
        else:
            dst = open(sys.argv[2], 'w', 1)

    hilite(src, dst, title)

if __name__ == '__main__': main()