#!/usr/bin/env python import sys, os, markdown, re from BeautifulSoup import BeautifulSoup def _split_lines(s): return re.findall(r'([^\n]*\n?)', s) class Writer: def __init__(self): self.started = False self.indent = 0 self.last_wrote = '\n' def _write(self, s): if s: self.last_wrote = s sys.stdout.write(s) def writeln(self, s): if s: self.linebreak() self._write('%s\n' % s) def write(self, s): if s: self.para() for line in _split_lines(s): if line.startswith('.'): self._write('\\&' + line) else: self._write(line) def linebreak(self): if not self.last_wrote.endswith('\n'): self._write('\n') def para(self, bullet=None): if not self.started: if not bullet: bullet = ' ' if not self.indent: self.writeln(_macro('.PP')) else: assert(self.indent >= 2) prefix = ' '*(self.indent-2) + bullet + ' ' self.writeln('.IP "%s" %d' % (prefix, self.indent)) self.started = True def end_para(self): self.linebreak() self.started = False def start_bullet(self): self.indent += 3 self.para(bullet='\\[bu]') def end_bullet(self): self.indent -= 3 self.end_para() w = Writer() def _macro(name, *args): if not name.startswith('.'): raise ValueError('macro names must start with "."') fixargs = [] for i in args: i = str(i) i = i.replace('\\', '') i = i.replace('"', "'") if (' ' in i) or not i: i = '"%s"' % i fixargs.append(i) return ' '.join([name] + list(fixargs)) def macro(name, *args): w.writeln(_macro(name, *args)) def _force_string(owner, tag): if tag.string: return tag.string else: out = '' for i in tag: if not (i.string or i.name in ['a', 'br']): raise ValueError('"%s" tags must contain only strings: ' 'got %r: %r' % (owner.name, tag.name, tag)) out += _force_string(owner, i) return out def _clean(s): s = s.replace('\\', '\\\\') return s def _bitlist(tag): if getattr(tag, 'contents', None) == None: for i in _split_lines(str(tag)): yield None,_clean(i) else: for e in tag: name = getattr(e, 'name', None) if name in ['a', 'br']: name = None # just treat as simple text s = _force_string(tag, e) if name: yield name,_clean(s) else: for i in _split_lines(s): yield None,_clean(i) def _bitlist_simple(tag): for typ,text in _bitlist(tag): if typ and not typ in ['em', 'strong', 'code']: raise ValueError('unexpected tag %r inside %r' % (typ, tag.name)) yield text def _text(bitlist): out = '' for typ,text in bitlist: if not typ: out += text elif typ == 'em': out += '\\fI%s\\fR' % text elif typ in ['strong', 'code']: out += '\\fB%s\\fR' % text else: raise ValueError('unexpected tag %r inside %r' % (typ, tag.name)) out = out.strip() out = re.sub(re.compile(r'^\s+', re.M), '', out) return out def text(tag): w.write(_text(_bitlist(tag))) # This is needed because .BI (and .BR, .RB, etc) are weird little state # machines that alternate between two fonts. So if someone says something # like foochickenwickendicken we have to convert that to # .BI foo chickenwicken dicken def _boldline(l): out = [''] last_bold = False for typ,text in l: nonzero = not not typ if nonzero != last_bold: last_bold = not last_bold out.append('') out[-1] += re.sub(r'\s+', ' ', text) macro('.BI', *out) def do_definition(tag): w.end_para() macro('.TP') w.started = True split = 0 pre = [] post = [] for typ,text in _bitlist(tag): if split: post.append((typ,text)) elif text.lstrip().startswith(': '): split = 1 post.append((typ,text.lstrip()[2:].lstrip())) else: pre.append((typ,text)) _boldline(pre) w.write(_text(post)) def do_list(tag): for i in tag: name = getattr(i, 'name', '').lower() if not name and not str(i).strip(): pass elif name != 'li': raise ValueError('only
  • is allowed inside