#!/usr/bin/env python
# Massages text into lightweight html, needs python 2 (probably 2.2).
# Usage:
# Edit OUT_PAT according to where you wnat the html files created, create
# any directories needed and:
# convert.py <textfile> [... <textfile>]
# Paul Sorenson
# $Revision$
# vi: et

import sys
import cgi  # only used for escaping html reserved characters in input text
import re

class Converter:

    RE_URL = re.compile('''(http|ftp|https)://\S+''', re.IGNORECASE)
    RE_ADDR = re.compile('''(&lt;)?(\S+@[^&\s]+)(&gt;)?''', re.IGNORECASE)
    RE_FILE = re.compile('''^(.*?).?([^\.]*)$''')     # crack filenames
    # Patterns used to select output filename (not thoroughly tested)
    # %b gets base part of filename (everything up to last '.' if one exists, 
    # otherwise everything), same as \g<1>
    # %e gets extension (not including '.' if it exists) same as \g<2>
    # If None then use stdout
    OUT_PAT = None                     # everthing goes to stdout
    #OUT_PAT = '''%b.%e.foo'''          # index.txt > index.txt.foo
    #OUT_PAT = '''otherdir/%b.html'''   # index.txt > otherdir/index.html
    #OUT_PAT = '''%b.html'''             # index.txt > index.html
    #OUT_PAT = '''tmp/%b.html'''         # index.txt > tmp/index.html

    def __init__(self):
        # Convert the user pattern to a valid replacement string
        # There is nothing stopping the user enter \g<n> syntax directly
        if self.OUT_PAT:
            self.OUT_SUB = self.OUT_PAT.replace('%b', '''\g<1>''').replace('%e', '''\g<2>''')
        self.index = {}
        self.fileIndex = {}

    def convert(self, filename):
        self.filename = filename
        self.setOut(filename)
        self.IN_PARA = 0
        f = file(filename)
        self.writeHeader()
        for line in f.xreadlines():
            self.lineproc(line)
        if self.IN_PARA:
            self.write('<p>\n')
        self.writeFooter()
        if self.OUT != sys.stdout:
            self.OUT.close()

    def lineproc(self, line):
        line = line.strip()
        if len(line) == 0 and self.IN_PARA:
            self.write('</p>\n')
            self.IN_PARA = 0
        else:   # we have some text
            if not self.IN_PARA:
                self.write('<p>')
                self.IN_PARA = 1
            else:
                self.write('<br>\n')
            # Escape reserved HTML characters
            line = cgi.escape(line, 1)
            line = self.replaceEmailAddr(line)
            line = self.replaceUrl(line)
            self.write(line)

    def replaceUrl(self, line):
        line = self.RE_URL.sub('''<a href="\g<0>">\g<0></a>''', line)
        return line
        
    def replaceEmailAddr(self, line):
        # In real life you might want to obfuscate email addresses.
        line = self.RE_ADDR.sub('''<a href="mailto:\g<2>">\g<2></a>''', line)
        return line

    def writeHeader(self):
        self.write('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">\n\n')
        self.write('<html><head><title>%s</title><link rel=stylesheet type="text/css" href="http://linuxmafia.com/redrick.css"></head><body><div id="content">\n' % self.filename)

    def writeFooter(self):
        self.write('</div></body></html>\n')

    def setOut(self, filename):
        if not self.OUT_PAT:
            self.OUT = sys.stdout
        else:
            outFile = self.RE_FILE.sub(self.OUT_SUB, filename)
            self.addIndex(filename, outFile)
            print '<!-- ', filename, '-', outFile, '-->'
            self.OUT = file(outFile, 'w')
            #self.OUT = sys.stdout

    def addIndex(self, inFilename, outFilename):
        base = self.RE_FILE.sub('''\g<1>''', inFilename)
        ind = base.split('-')
        map = self.index
        for heading in ind:
            if map.has_key(heading):
                map = map[heading]
            else:
                map[heading] = {}
                map = map[heading]
        self.fileIndex[''.join(ind)] = outFilename

    def writeIndex(self):
        self.filename = 'index_auto.html'
        self.OUT = file(self.filename, 'w')
        self.writeHeader()
        self.printMap(self.index, 0, '')
        self.writeFooter()
        self.OUT.close()

    def printMap(self, map, pad, lookup):
        keys = map.keys()
        keys.sort()
        self.write('<ul>\n')
        for key in keys:
            filemap = lookup + key
            if self.fileIndex.has_key(filemap):
                s = self.makeUrl(self.fileIndex[filemap], key)
            else:
                s = key
            self.write('<li>' + s + '\n') 
            if map[key]:
                self.printMap(map[key], pad + 2, filemap)
        self.write('</ul>\n')

    def makeUrl(self, ref, text):
        val = None
        if ref and text:
            val = (ref, text)
        else:
            val = (ref, ref)
        return '''<a href="%s">%s</a>''' % val

    def write(self, text):
        self.OUT.write(text)

 
def main():
    c = Converter()
    for arg in sys.argv[1:]:
        c.convert(arg)
    c.writeIndex()

main()
