This repository has been archived on 2023-10-17. You can view files and clone it, but cannot push or open issues or pull requests.
dotfiles/.vim/doc/db2vim/db2vim

716 lines
21 KiB
Python

#!/usr/bin/env python2
r"""
db2vim [options] file.xml
SHORT OPTIONS
-d Prints some debugging information on stderr.
-s If given, the db2vim operates in a 'stict' conversion mode, i.e, any
element which does not have a handler defined for them it be
completeley ignored including all its children. Otherwise, db2vim will
recurse into an unknown tag and process any of its children it
recognizes. Since db2vim always recognizes text nodes, not using this
option has the effect that all text will be printed out, even if
somewhat incorrectly.
LONG OPTIONS
--prefix=<prefix>
This is a string like "ls_" which will be prepended to the section
numbers. Default to 'ls_' if unsupplied.
"""
import xml.dom.minidom
import getopt
import string
import re
import sys
# Okay. so I import *. Shoot me.
from textutils import *
from domutils import *
# define a bunch of constants for formatting.
TEXT_WIDTH = 80
BLOCK_QUOTE = 4
COL_SPACE = 2
# a bunch of globals used in creating the Table of contents.
#
# TOC_HASH['section 1.1 label'] = 'ls_1_1'
#
# LEVEL_HASH['section 1.1 label'] = 1
# (top level article has level 0)
#
# TITLE_HASH['section 1.1 label'] = 'Title of section 1.1'
#
# FILENAME = the name of the file being processed with the last extension
# changed to .txt
#
# TOC_PREFIX = 'ls_' (the prefix used to create the section labels).
TOC_HASH = {}
LEVEL_HASH = {}
TITLE_HASH = {}
FILENAME = ''
TOC_PREFIX = ''
ANCHOR_HASH = {}
URL_HASH = {}
# STDERR for printing debugging info.
DEBUG = 0
STDERR = sys.stderr
STRICT = 0
NUM_ANCHORS = {0: 1}
###############################################################################
# Miscellaneous utility functions
###############################################################################
def encodeTo52(num):
if num < 26:
return unichr(ord('a') + num)
elif num < 52:
return unichr(ord('A') + num - 26)
else:
return encodeTo52(int(num / 52)) + encodeTo52(num % 52)
def makeTocHash(rootElement, width, prefix='', level=0):
lastLabelUsed = 0
for section in rootElement.getChildrenByTagName('section'):
title = section.getChildrenByTagName('title')[0]
titleText = handleElement(title, width)
lastLabelUsed += 1
thisLabel = TOC_PREFIX + prefix + str(lastLabelUsed)
sectionid = section.getAttribute('id')
if not sectionid:
section.setAttribute('id', thisLabel)
sectionid = thisLabel
NUM_ANCHORS[0] += 1
ANCHOR_HASH[sectionid] = TOC_PREFIX + 'a_' + encodeTo52(
NUM_ANCHORS[0] + 52)
TOC_HASH[sectionid] = thisLabel
LEVEL_HASH[sectionid] = level
TITLE_HASH[sectionid] = titleText
if section.getChildrenByTagName('section'):
makeTocHash(section, width - 5, prefix=prefix +
str(lastLabelUsed) + '_', level=level + 1)
def makeAnchorHash(rootElement):
anchors = rootElement.getElementsByTagName(
'anchor') + rootElement.getElementsByTagName('note')
for anchor in anchors:
if not anchor.getAttribute('id'):
continue
NUM_ANCHORS[0] += 1
if anchor.getAttribute('id') in ANCHOR_HASH or \
anchor.getAttribute('id') in TOC_HASH:
sys.stderr.write("Warning: anchor [%s] multiply defined\n" %
anchor.getAttribute('id'))
ANCHOR_HASH[anchor.getAttribute(
'id')] = TOC_PREFIX + 'a_' + encodeTo52(NUM_ANCHORS[0] + 52)
def makeURLHash(rootElement):
urls = rootElement.getElementsByTagName('ulink')
numURLs = 0
for url in urls:
if not url.getAttribute('url') or url.getAttribute('url') in URL_HASH:
continue
numURLs += 1
URL_HASH[url.getAttribute('url')] = TOC_PREFIX + 'u_' + str(numURLs)
def makeTOC(node, width, maxlevel=1):
retText = ""
for section in node.getChildrenByTagName('section'):
sectionid = section.getAttribute('id')
thisLabel = TOC_HASH.get(sectionid, '')
titleText = TITLE_HASH.get(sectionid, '')
level = LEVEL_HASH.get(sectionid, 10)
if level <= maxlevel:
retText += '|' + thisLabel + '| ' + titleText + '\n'
if level < maxlevel and section.getChildrenByTagName('section'):
childText = makeTOC(section, width - 5)
retText += VertCatString(" ", 4, childText) + '\n'
retText = re.sub(r'\s+$', r'\n', retText)
return retText
###############################################################################
# Generalized function for handling dom elements.
###############################################################################
def IsInlineTag(self):
if self.nodeType == self.TEXT_NODE:
return 1
elif inlineTags.get(self.tagName, 0):
return 1
else:
return 0
def getChildrenByTagName(self, name):
"""
extension to the xml.dom.minidom.Element class. returns all direct
descendants of this Element.
"""
nodeList = []
child = self.firstChild
while not child is None:
if child.nodeType == child.ELEMENT_NODE and child.nodeName == name:
nodeList.append(child)
child = child.nextSibling
return nodeList
xml.dom.minidom.Element.getChildrenByTagName = getChildrenByTagName
def handleElement(rootElement, width=TEXT_WIDTH):
"""
Generalized function to handle an Element node in a DOM tree.
"""
retText = ""
child = rootElement.firstChild
while not child is None:
printerr('node type = %d' % child.nodeType)
if child.nodeType == child.ELEMENT_NODE:
printerr('processing [%s]' % child.tagName)
isinline = IsInlineTag(child)
# if the child is an Element and if a handler exists, then call it.
if not isinline \
and child.nodeType == child.ELEMENT_NODE \
and child.tagName in handlerMaps:
# offset the child text by the current indentation value
printerr('making recursive call to known child.')
retText += handlerMaps[child.tagName](child, width)
child = child.nextSibling
elif not isinline \
and child.nodeType == child.PROCESSING_INSTRUCTION_NODE \
and child.target == 'vimhelp':
if child.data in handlerMaps:
retText += handlerMaps[child.data](child, width)
child = child.nextSibling
# if its a text node or an inline element node, collect consecutive
# text nodes into a single paragraph and indent it.
elif isinline:
text = ""
while not child is None and IsInlineTag(child):
if child.nodeType == child.TEXT_NODE:
text += child.data
elif child.nodeType == child.ELEMENT_NODE:
if child.tagName in handlerMaps:
text += handlerMaps[child.tagName](child, width)
else:
text += GetText(child.childNodes)
child = child.nextSibling
retText += IndentParagraphs(text, width)
# If we cannot understand _anything_ about the element, then just
# handle its children hoping we have something to gather from
# there.
elif not STRICT:
printerr('making recursive call for unkown child')
retText += handleElement(child, width)
child = child.nextSibling
else:
child = child.nextSibling
return retText
###############################################################################
# Functions for handling various xml tags
###############################################################################
def handleArticleInfo(articleinfo, width):
makeTocHash(articleinfo.parentNode, width)
makeAnchorHash(articleinfo.parentNode)
makeURLHash(articleinfo.parentNode)
title = articleinfo.getChildrenByTagName('title')
if title is None:
print("Article should have a title!")
sys.exit(1)
name = GetText(title[0].childNodes)
authors = articleinfo.getChildrenByTagName('author')
authorText = ''
for author in authors:
firstname = ''
surname = ''
if author.getElementsByTagName('firstname'):
firstname = GetTextFromElementNode(author, 'firstname')[0]
if author.getChildrenByTagName('surname'):
surname = GetTextFromElementNode(author, 'surname')[0]
if author.getElementsByTagName('email'):
email = GetTextFromElementNode(author, 'email')[0]
authorText = authorText + firstname + ' ' + surname + \
' <' + email + '>\n'
abstractText = ''
abstract = articleinfo.getChildrenByTagName('abstract')
if abstract is not None:
abstractText = '\n\n' + CenterText('Abstract\n========', width)
abstractText += handleElement(abstract[0], width) + '\n'
retText = CenterText(name + '\n*' + FILENAME + '*\n' + authorText, width)
retText += abstractText
toc = makeTOC(articleinfo.parentNode, width)
return retText + '\n' + RightJustify('*' + FILENAME + '-toc*', width) + \
'\n' + toc
def handleOption(option, width):
retText = ""
names = GetTextFromElementNode(option, "name")
for name in names:
retText += string.rjust("*" + name + "*", width) + "\n"
nameTexts = ""
maxNameLen = -1
for name in names:
maxNameLen = max(maxNameLen, len(name + " "))
nameTexts += name + " \n"
desc = option.getChildrenByTagName("desc")[0]
descText = handleElement(desc, width=width - maxNameLen)
retText += VertCatString(nameTexts + " ", None, descText)
return retText + "\n"
def handleOptionDefault(default, width):
type = string.join(GetTextFromElementNode(default, "type"), "\n")
extra = string.join(GetTextFromElementNode(default, "extra"), "\n")
return type + "\t(" + extra + ")"
def handleTableRoot(root, width):
tgroup = root.getChildrenByTagName('tgroup')[0]
if tgroup is None:
return ''
rows = []
numHeadRows = 0
if tgroup.getChildrenByTagName('thead'):
thead = tgroup.getChildrenByTagName('thead')[0]
rows = thead.getChildrenByTagName('row')
numHeadRows = len(rows)
tbody = tgroup.getChildrenByTagName('tbody')[0]
rows += tbody.getChildrenByTagName('row')
widths, text = calculateColumnWidthsDoublePass(rows, width)
headText = text[0:numHeadRows]
bodyText = text[numHeadRows:]
headTable = FormatTable(headText, ROW_SPACE=1, COL_SPACE=
COL_SPACE, justify=0, widths=widths)
if headTable:
headTable = re.sub(r'\n|$', '\g<0>~', headTable)
bodyTable = FormatTable(bodyText, ROW_SPACE=1, COL_SPACE=
COL_SPACE, justify=0, widths=widths)
return headTable + '\n' + re.sub(r'\n+$', '', bodyTable) + '\n\n'
def calculateColumnWidths(rows, alloc_widths):
widths = {}
text = []
for row in rows:
cols = row.getChildrenByTagName("entry")
if len(alloc_widths) == 1:
alloc_widths *= len(cols)
colwidths = []
rowtext = []
for col, width in zip(cols, alloc_widths):
coltext = handleElement(col, width)
rowtext.append(coltext)
# This is the 'width' of the current cell including the
# whitespace padding.
colwidths.append(max(map(len, coltext.split("\n")))
+ COL_SPACE)
text.append(rowtext)
# update the widths of the columns by finding the maximum
# width of all cells in this column.
for i in range(len(colwidths)):
widths[i] = max(colwidths[i], widths.get(i, -1))
return widths, text
def calculateColumnWidthsDoublePass(rows, width):
maxwidths, text = calculateColumnWidths(rows, [width])
if reduce(lambda x, y: x + y, maxwidths.values()) <= width:
return maxwidths, text
# now find out how many columns exceed the maximum permitted width.
# nlarge: number of columns which are too wide.
# remainingWidth: width which these large columns can share.
nlarge = 0
remainingWidth = width
for colwidth in maxwidths.values():
if colwidth > width / len(maxwidths):
nlarge += 1
else:
remainingWidth += -colwidth
# newmaxwidth: width which each of the large columns is allowed.
newmaxwidth = remainingWidth / max(nlarge, 1)
newcolwidths = []
for colwidth in maxwidths.values():
newcolwidths += [min(colwidth, newmaxwidth)]
# make another run and this time ask each cell to restrict itself to
# newmaxwidth as calculated above.
newmaxwidth, newtext = calculateColumnWidths(rows, newcolwidths)
return newmaxwidth, newtext
def handleCode(code, width):
retText = GetText(code.childNodes)
return " &codebegin;\n" + VertCatString(" ", 4, retText) + "&codeend;"
def handleList(list, width, marker=0):
if list.tagName == 'simplelist':
child = 'member'
decoration = ''
elif list.tagName == 'orderedlist':
child = 'listitem'
else:
child = 'member'
decoration = '- '
retText = ""
items = list.getChildrenByTagName(child)
i = 1
for item in items:
if list.tagName == 'orderedlist':
decoration = str(i) + '. '
i = i + 1
itemText = handleElement(item, width - len(decoration))
itemText = VertCatString(decoration, None, itemText)
retText += '\n' + re.sub(r'\s+$', '', itemText) + "\n"
return retText
def handleNote(note, width):
title = None
if note.getChildrenByTagName('title'):
title = note.getChildrenByTagName('title')[0]
name = GetText(title.childNodes)
note.removeChild(title)
noteid = ''
if note.getAttribute('id'):
noteTagText = '*' + note.getAttribute('id') + '* '
noteTagText += '*' + ANCHOR_HASH[note.getAttribute('id')] + '*'
noteTagText = IndentParagraphs(noteTagText, width / 2)
noteid = RightJustify(noteTagText, width) + '\n'
noteText = handleElement(note, width - len("NOTE: "))
if title is not None:
noteText = name + '\n' + ('-' * len(name)) + '\n' + noteText
noteText = noteid + VertCatString("NOTE: ", None, noteText)
return noteText + "\n"
def handleParagraph(paragraph, width):
partext = handleElement(paragraph, width)
partext = re.sub(r'\n+$', '', partext)
partext = re.sub(r'^\n+', '', partext)
return partext + "\n\n"
def handleFormalParagraph(formalparagraph, width):
title = None
if formalparagraph.getChildrenByTagName('title'):
title = formalparagraph.getChildrenByTagName('title')[0]
name = GetText(title.childNodes)
formalparagraph.removeChild(title)
partext = handleElement(formalparagraph, width)
partext = re.sub(r'\n+$', '', partext)
partext = re.sub(r'^\n+', '', partext)
if title is not None:
partext = name + '\n' + ('-' * len(name)) + '\n' + partext
return partext + "\n\n"
def handleBlockQuote(block, width):
text = handleElement(block, width - BLOCK_QUOTE)
text = VertCatString(" " * BLOCK_QUOTE,
BLOCK_QUOTE, text)
return text + "\n"
def handleLink(link, width):
linkend = link.getAttribute('linkend')
if linkend not in ANCHOR_HASH:
print >> STDERR, "Warning: Link ID [%s] not found in TOC" % linkend
text = handleElement(link, width)
anchorpt = ANCHOR_HASH.get(linkend)
if not anchorpt:
anchorpt = ''
return text + ' [|' + anchorpt + '|]'
def handleAnchor(anchor, width):
anchorText = '*' + anchor.getAttribute('id') + '* '
anchorText += '*' + ANCHOR_HASH[anchor.getAttribute('id')] + '*'
return RightJustify(anchorText, width) + "\n"
def handleSection(section, width):
title = section.getChildrenByTagName('title')[0]
name = handleElement(title, width)
sectionid = section.getAttribute('id')
tagsformatted = ''
if sectionid in TOC_HASH:
tagsformatted = '*%s* ' % TOC_HASH[sectionid]
if sectionid in ANCHOR_HASH:
tagsformatted += '*%s* ' % ANCHOR_HASH[sectionid]
if sectionid and sectionid in TOC_HASH and \
sectionid != TOC_HASH[sectionid]:
tagsformatted += '*%s*' % sectionid
# try to indent to a width of 20
tagsformatted = RightJustify(IndentParagraphs(tagsformatted, 30), 0)
tagswidth = TextWidth(tagsformatted)
# width(name) + nspaces + width(tags) = 80
if len(tagsformatted) > 2:
header = VertCatString(name, 80 - tagswidth, tagsformatted)
else:
header = name
section.removeChild(title)
text = handleElement(section, width)
thislevel = LEVEL_HASH.get(sectionid, -1)
if thislevel == 0:
delim = '='
newlines = '\n\n'
elif thislevel == 1:
delim = '-'
newlines = '\n'
else:
delim = ''
newlines = '\n'
thisTOC = ''
if thislevel <= 1:
thisTOC = makeTOC(section, width, maxlevel=1)
return "\n" + (delim * TEXT_WIDTH) + \
"\n" + header + newlines + thisTOC + newlines + re.sub(
r'\n+$', '', text) + "\n"
def handleUlink(ulink, width):
url = ulink.getAttribute('url')
text = handleElement(ulink)
# URL_HASH is created at the very beginning
if url:
return text + ' |%s|' % URL_HASH[url]
else:
print >> STDERR, "Warning: url attribute empty for [%s]" % text
return text
def handleIndexTerm(indexterm, width):
return ''
def handleEmphasis(emphasis, width):
return '_' + GetText(emphasis.childNodes) + '_'
###############################################################################
# A dictionary for mapping xml tags to functions.
###############################################################################
handlerMaps = {
'articleinfo': handleArticleInfo,
'table': handleTableRoot,
'informaltable': handleTableRoot,
'code': handleCode,
'programlisting': handleCode,
'list': handleList,
'simplelist': handleList,
'orderedlist': handleList,
'para': handleParagraph,
'formalpara': handleFormalParagraph,
'note': handleNote,
'link': handleLink,
'anchor': handleAnchor,
'section': handleSection,
'blockquote': handleBlockQuote,
'ulink': handleUlink,
'emphasis': handleEmphasis,
'indexterm': handleIndexTerm
}
inlineTags = {'tag': 1, 'literal': 1, 'link': 1,
'ulink': 1, 'citetitle': 1, 'indexterm': 1,
'emphasis': 1, 'filename': 1}
# helper functions for usage() and printerr()
def usage():
print __doc__
def printerr(statement):
if DEBUG:
print >> STDERR, statement
def replaceComment(matchobj):
initspace = matchobj.group(1)
firstsent = matchobj.group(2)
code = matchobj.group(3)
if len(initspace) > 0:
if initspace[0] == '<':
lastspace = initspace
else:
lastspace = '<' + initspace[:-1]
else:
lastspace = initspace
return '\n' + initspace + firstsent + ' >\n' + code + '\n' + lastspace
if __name__ == "__main__":
option = {}
try:
opts, args = getopt.getopt(sys.argv[1:], 'ds', ['prefix=', 'help'])
for oa, ov in opts:
option[oa] = ov
except getopt.GetoptError:
print >> STDERR, "Usage error: db2vim --help for usage"
sys.exit(1)
if '--help' in option:
usage()
sys.exit(0)
TOC_PREFIX = option.get('--prefix', 'ls_')
DEBUG = '-d' in option
if len(args) != 1:
print >> STDERR, "Usage error: db2vim --help for usage"
sys.exit(1)
fileName = args[0]
FILENAME = re.sub(r'\.\w+$', r'.txt', fileName)
try:
fp = open(fileName)
except:
print "Error opening xml file"
dom = xml.dom.minidom.parse(fp)
modeline = r'''
================================================================================
About this file
This file was created automatically from its XML variant using db2vim. db2vim is
a python script which understands a very limited subset of the Docbook XML 4.2
DTD and outputs a plain text file in vim help format.
db2vim can be obtained via anonymous CVS from sourceforge.net. Use
cvs -d:pserver:anonymous@cvs.vim-latex.sf.net:/cvsroot/vim-latex co db2vim
Or you can visit the web-interface to sourceforge CVS at:
http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/vim-latex/db2vim/
================================================================================'''
STRICT = '-s' in option
pattern = re.compile(
r'\n([< ]*)([^\n]+)&codebegin;\n(.*?)&codeend;', re.DOTALL)
processedDoc = handleElement(dom.documentElement)
while re.search('&codebegin;', processedDoc):
processedDoc = re.sub(pattern, replaceComment, processedDoc)
urlsection = r"""
================================================================================
URLs used in this file
"""
labels = zip(URL_HASH.values(), URL_HASH.keys())
labels.sort()
for label, url in labels:
urlsection += '*%s* : %s\n' % (label, url)
processedDoc = processedDoc + urlsection + modeline
print processedDoc.encode('iso-8859-1')
# vim:et:sts=4