716 lines
21 KiB
Python
716 lines
21 KiB
Python
#!/usr/bin/env python2
|
|
r"""
|
|
db2vim [options] file.xml
|
|
|
|
SHORT OPTIONS
|
|
|
|
-d Prints some debugging information on stderr.
|
|
|
|
-s If given, the db2vim operates in a 'stict' conversion mode, i.e, any
|
|
element which does not have a handler defined for them it be
|
|
completeley ignored including all its children. Otherwise, db2vim will
|
|
recurse into an unknown tag and process any of its children it
|
|
recognizes. Since db2vim always recognizes text nodes, not using this
|
|
option has the effect that all text will be printed out, even if
|
|
somewhat incorrectly.
|
|
|
|
LONG OPTIONS
|
|
|
|
--prefix=<prefix>
|
|
This is a string like "ls_" which will be prepended to the section
|
|
numbers. Default to 'ls_' if unsupplied.
|
|
"""
|
|
|
|
|
|
import xml.dom.minidom
|
|
import getopt
|
|
import string
|
|
import re
|
|
import sys
|
|
|
|
# Okay. so I import *. Shoot me.
|
|
from textutils import *
|
|
from domutils import *
|
|
|
|
# define a bunch of constants for formatting.
|
|
TEXT_WIDTH = 80
|
|
BLOCK_QUOTE = 4
|
|
COL_SPACE = 2
|
|
|
|
# a bunch of globals used in creating the Table of contents.
|
|
#
|
|
# TOC_HASH['section 1.1 label'] = 'ls_1_1'
|
|
#
|
|
# LEVEL_HASH['section 1.1 label'] = 1
|
|
# (top level article has level 0)
|
|
#
|
|
# TITLE_HASH['section 1.1 label'] = 'Title of section 1.1'
|
|
#
|
|
# FILENAME = the name of the file being processed with the last extension
|
|
# changed to .txt
|
|
#
|
|
# TOC_PREFIX = 'ls_' (the prefix used to create the section labels).
|
|
TOC_HASH = {}
|
|
LEVEL_HASH = {}
|
|
TITLE_HASH = {}
|
|
FILENAME = ''
|
|
TOC_PREFIX = ''
|
|
|
|
ANCHOR_HASH = {}
|
|
URL_HASH = {}
|
|
|
|
# STDERR for printing debugging info.
|
|
DEBUG = 0
|
|
STDERR = sys.stderr
|
|
STRICT = 0
|
|
NUM_ANCHORS = {0: 1}
|
|
|
|
###############################################################################
|
|
# Miscellaneous utility functions
|
|
###############################################################################
|
|
|
|
|
|
def encodeTo52(num):
|
|
if num < 26:
|
|
return unichr(ord('a') + num)
|
|
elif num < 52:
|
|
return unichr(ord('A') + num - 26)
|
|
else:
|
|
return encodeTo52(int(num / 52)) + encodeTo52(num % 52)
|
|
|
|
|
|
def makeTocHash(rootElement, width, prefix='', level=0):
|
|
lastLabelUsed = 0
|
|
|
|
for section in rootElement.getChildrenByTagName('section'):
|
|
title = section.getChildrenByTagName('title')[0]
|
|
titleText = handleElement(title, width)
|
|
lastLabelUsed += 1
|
|
thisLabel = TOC_PREFIX + prefix + str(lastLabelUsed)
|
|
|
|
sectionid = section.getAttribute('id')
|
|
if not sectionid:
|
|
section.setAttribute('id', thisLabel)
|
|
sectionid = thisLabel
|
|
|
|
NUM_ANCHORS[0] += 1
|
|
ANCHOR_HASH[sectionid] = TOC_PREFIX + 'a_' + encodeTo52(
|
|
NUM_ANCHORS[0] + 52)
|
|
|
|
TOC_HASH[sectionid] = thisLabel
|
|
LEVEL_HASH[sectionid] = level
|
|
TITLE_HASH[sectionid] = titleText
|
|
|
|
if section.getChildrenByTagName('section'):
|
|
makeTocHash(section, width - 5, prefix=prefix +
|
|
str(lastLabelUsed) + '_', level=level + 1)
|
|
|
|
|
|
def makeAnchorHash(rootElement):
|
|
anchors = rootElement.getElementsByTagName(
|
|
'anchor') + rootElement.getElementsByTagName('note')
|
|
for anchor in anchors:
|
|
if not anchor.getAttribute('id'):
|
|
continue
|
|
|
|
NUM_ANCHORS[0] += 1
|
|
if anchor.getAttribute('id') in ANCHOR_HASH or \
|
|
anchor.getAttribute('id') in TOC_HASH:
|
|
sys.stderr.write("Warning: anchor [%s] multiply defined\n" %
|
|
anchor.getAttribute('id'))
|
|
|
|
ANCHOR_HASH[anchor.getAttribute(
|
|
'id')] = TOC_PREFIX + 'a_' + encodeTo52(NUM_ANCHORS[0] + 52)
|
|
|
|
|
|
def makeURLHash(rootElement):
|
|
urls = rootElement.getElementsByTagName('ulink')
|
|
numURLs = 0
|
|
for url in urls:
|
|
if not url.getAttribute('url') or url.getAttribute('url') in URL_HASH:
|
|
continue
|
|
numURLs += 1
|
|
URL_HASH[url.getAttribute('url')] = TOC_PREFIX + 'u_' + str(numURLs)
|
|
|
|
|
|
def makeTOC(node, width, maxlevel=1):
|
|
retText = ""
|
|
|
|
for section in node.getChildrenByTagName('section'):
|
|
|
|
sectionid = section.getAttribute('id')
|
|
thisLabel = TOC_HASH.get(sectionid, '')
|
|
titleText = TITLE_HASH.get(sectionid, '')
|
|
level = LEVEL_HASH.get(sectionid, 10)
|
|
|
|
if level <= maxlevel:
|
|
retText += '|' + thisLabel + '| ' + titleText + '\n'
|
|
|
|
if level < maxlevel and section.getChildrenByTagName('section'):
|
|
childText = makeTOC(section, width - 5)
|
|
retText += VertCatString(" ", 4, childText) + '\n'
|
|
|
|
retText = re.sub(r'\s+$', r'\n', retText)
|
|
|
|
return retText
|
|
|
|
|
|
###############################################################################
|
|
# Generalized function for handling dom elements.
|
|
###############################################################################
|
|
|
|
|
|
def IsInlineTag(self):
|
|
if self.nodeType == self.TEXT_NODE:
|
|
return 1
|
|
elif inlineTags.get(self.tagName, 0):
|
|
return 1
|
|
else:
|
|
return 0
|
|
|
|
|
|
def getChildrenByTagName(self, name):
|
|
"""
|
|
extension to the xml.dom.minidom.Element class. returns all direct
|
|
descendants of this Element.
|
|
"""
|
|
nodeList = []
|
|
|
|
child = self.firstChild
|
|
while not child is None:
|
|
if child.nodeType == child.ELEMENT_NODE and child.nodeName == name:
|
|
nodeList.append(child)
|
|
|
|
child = child.nextSibling
|
|
|
|
return nodeList
|
|
|
|
xml.dom.minidom.Element.getChildrenByTagName = getChildrenByTagName
|
|
|
|
|
|
def handleElement(rootElement, width=TEXT_WIDTH):
|
|
"""
|
|
Generalized function to handle an Element node in a DOM tree.
|
|
"""
|
|
|
|
retText = ""
|
|
child = rootElement.firstChild
|
|
while not child is None:
|
|
|
|
printerr('node type = %d' % child.nodeType)
|
|
if child.nodeType == child.ELEMENT_NODE:
|
|
printerr('processing [%s]' % child.tagName)
|
|
|
|
isinline = IsInlineTag(child)
|
|
|
|
# if the child is an Element and if a handler exists, then call it.
|
|
if not isinline \
|
|
and child.nodeType == child.ELEMENT_NODE \
|
|
and child.tagName in handlerMaps:
|
|
# offset the child text by the current indentation value
|
|
printerr('making recursive call to known child.')
|
|
retText += handlerMaps[child.tagName](child, width)
|
|
child = child.nextSibling
|
|
|
|
elif not isinline \
|
|
and child.nodeType == child.PROCESSING_INSTRUCTION_NODE \
|
|
and child.target == 'vimhelp':
|
|
|
|
if child.data in handlerMaps:
|
|
retText += handlerMaps[child.data](child, width)
|
|
|
|
child = child.nextSibling
|
|
|
|
# if its a text node or an inline element node, collect consecutive
|
|
# text nodes into a single paragraph and indent it.
|
|
elif isinline:
|
|
|
|
text = ""
|
|
while not child is None and IsInlineTag(child):
|
|
if child.nodeType == child.TEXT_NODE:
|
|
text += child.data
|
|
elif child.nodeType == child.ELEMENT_NODE:
|
|
if child.tagName in handlerMaps:
|
|
text += handlerMaps[child.tagName](child, width)
|
|
else:
|
|
text += GetText(child.childNodes)
|
|
child = child.nextSibling
|
|
|
|
retText += IndentParagraphs(text, width)
|
|
|
|
# If we cannot understand _anything_ about the element, then just
|
|
# handle its children hoping we have something to gather from
|
|
# there.
|
|
elif not STRICT:
|
|
printerr('making recursive call for unkown child')
|
|
retText += handleElement(child, width)
|
|
child = child.nextSibling
|
|
|
|
else:
|
|
child = child.nextSibling
|
|
|
|
return retText
|
|
|
|
|
|
###############################################################################
|
|
# Functions for handling various xml tags
|
|
###############################################################################
|
|
|
|
|
|
def handleArticleInfo(articleinfo, width):
|
|
|
|
makeTocHash(articleinfo.parentNode, width)
|
|
makeAnchorHash(articleinfo.parentNode)
|
|
makeURLHash(articleinfo.parentNode)
|
|
|
|
title = articleinfo.getChildrenByTagName('title')
|
|
if title is None:
|
|
print("Article should have a title!")
|
|
sys.exit(1)
|
|
|
|
name = GetText(title[0].childNodes)
|
|
authors = articleinfo.getChildrenByTagName('author')
|
|
|
|
authorText = ''
|
|
for author in authors:
|
|
firstname = ''
|
|
surname = ''
|
|
if author.getElementsByTagName('firstname'):
|
|
firstname = GetTextFromElementNode(author, 'firstname')[0]
|
|
if author.getChildrenByTagName('surname'):
|
|
surname = GetTextFromElementNode(author, 'surname')[0]
|
|
if author.getElementsByTagName('email'):
|
|
email = GetTextFromElementNode(author, 'email')[0]
|
|
authorText = authorText + firstname + ' ' + surname + \
|
|
' <' + email + '>\n'
|
|
|
|
abstractText = ''
|
|
abstract = articleinfo.getChildrenByTagName('abstract')
|
|
if abstract is not None:
|
|
abstractText = '\n\n' + CenterText('Abstract\n========', width)
|
|
abstractText += handleElement(abstract[0], width) + '\n'
|
|
|
|
retText = CenterText(name + '\n*' + FILENAME + '*\n' + authorText, width)
|
|
retText += abstractText
|
|
|
|
toc = makeTOC(articleinfo.parentNode, width)
|
|
|
|
return retText + '\n' + RightJustify('*' + FILENAME + '-toc*', width) + \
|
|
'\n' + toc
|
|
|
|
|
|
def handleOption(option, width):
|
|
retText = ""
|
|
names = GetTextFromElementNode(option, "name")
|
|
|
|
for name in names:
|
|
retText += string.rjust("*" + name + "*", width) + "\n"
|
|
|
|
nameTexts = ""
|
|
maxNameLen = -1
|
|
for name in names:
|
|
maxNameLen = max(maxNameLen, len(name + " "))
|
|
nameTexts += name + " \n"
|
|
|
|
desc = option.getChildrenByTagName("desc")[0]
|
|
descText = handleElement(desc, width=width - maxNameLen)
|
|
|
|
retText += VertCatString(nameTexts + " ", None, descText)
|
|
|
|
return retText + "\n"
|
|
|
|
|
|
def handleOptionDefault(default, width):
|
|
type = string.join(GetTextFromElementNode(default, "type"), "\n")
|
|
extra = string.join(GetTextFromElementNode(default, "extra"), "\n")
|
|
return type + "\t(" + extra + ")"
|
|
|
|
|
|
def handleTableRoot(root, width):
|
|
tgroup = root.getChildrenByTagName('tgroup')[0]
|
|
if tgroup is None:
|
|
return ''
|
|
|
|
rows = []
|
|
numHeadRows = 0
|
|
if tgroup.getChildrenByTagName('thead'):
|
|
thead = tgroup.getChildrenByTagName('thead')[0]
|
|
rows = thead.getChildrenByTagName('row')
|
|
numHeadRows = len(rows)
|
|
|
|
tbody = tgroup.getChildrenByTagName('tbody')[0]
|
|
rows += tbody.getChildrenByTagName('row')
|
|
|
|
widths, text = calculateColumnWidthsDoublePass(rows, width)
|
|
|
|
headText = text[0:numHeadRows]
|
|
bodyText = text[numHeadRows:]
|
|
|
|
headTable = FormatTable(headText, ROW_SPACE=1, COL_SPACE=
|
|
COL_SPACE, justify=0, widths=widths)
|
|
if headTable:
|
|
headTable = re.sub(r'\n|$', '\g<0>~', headTable)
|
|
bodyTable = FormatTable(bodyText, ROW_SPACE=1, COL_SPACE=
|
|
COL_SPACE, justify=0, widths=widths)
|
|
|
|
return headTable + '\n' + re.sub(r'\n+$', '', bodyTable) + '\n\n'
|
|
|
|
|
|
def calculateColumnWidths(rows, alloc_widths):
|
|
widths = {}
|
|
text = []
|
|
for row in rows:
|
|
cols = row.getChildrenByTagName("entry")
|
|
if len(alloc_widths) == 1:
|
|
alloc_widths *= len(cols)
|
|
|
|
colwidths = []
|
|
rowtext = []
|
|
for col, width in zip(cols, alloc_widths):
|
|
coltext = handleElement(col, width)
|
|
|
|
rowtext.append(coltext)
|
|
# This is the 'width' of the current cell including the
|
|
# whitespace padding.
|
|
colwidths.append(max(map(len, coltext.split("\n")))
|
|
+ COL_SPACE)
|
|
|
|
text.append(rowtext)
|
|
|
|
# update the widths of the columns by finding the maximum
|
|
# width of all cells in this column.
|
|
for i in range(len(colwidths)):
|
|
widths[i] = max(colwidths[i], widths.get(i, -1))
|
|
|
|
return widths, text
|
|
|
|
|
|
def calculateColumnWidthsDoublePass(rows, width):
|
|
maxwidths, text = calculateColumnWidths(rows, [width])
|
|
if reduce(lambda x, y: x + y, maxwidths.values()) <= width:
|
|
return maxwidths, text
|
|
|
|
# now find out how many columns exceed the maximum permitted width.
|
|
# nlarge: number of columns which are too wide.
|
|
# remainingWidth: width which these large columns can share.
|
|
nlarge = 0
|
|
remainingWidth = width
|
|
for colwidth in maxwidths.values():
|
|
if colwidth > width / len(maxwidths):
|
|
nlarge += 1
|
|
else:
|
|
remainingWidth += -colwidth
|
|
|
|
# newmaxwidth: width which each of the large columns is allowed.
|
|
newmaxwidth = remainingWidth / max(nlarge, 1)
|
|
|
|
newcolwidths = []
|
|
for colwidth in maxwidths.values():
|
|
newcolwidths += [min(colwidth, newmaxwidth)]
|
|
|
|
# make another run and this time ask each cell to restrict itself to
|
|
# newmaxwidth as calculated above.
|
|
newmaxwidth, newtext = calculateColumnWidths(rows, newcolwidths)
|
|
|
|
return newmaxwidth, newtext
|
|
|
|
|
|
def handleCode(code, width):
|
|
retText = GetText(code.childNodes)
|
|
return " &codebegin;\n" + VertCatString(" ", 4, retText) + "&codeend;"
|
|
|
|
|
|
def handleList(list, width, marker=0):
|
|
if list.tagName == 'simplelist':
|
|
child = 'member'
|
|
decoration = ''
|
|
elif list.tagName == 'orderedlist':
|
|
child = 'listitem'
|
|
else:
|
|
child = 'member'
|
|
decoration = '- '
|
|
|
|
retText = ""
|
|
items = list.getChildrenByTagName(child)
|
|
i = 1
|
|
|
|
for item in items:
|
|
if list.tagName == 'orderedlist':
|
|
decoration = str(i) + '. '
|
|
i = i + 1
|
|
itemText = handleElement(item, width - len(decoration))
|
|
itemText = VertCatString(decoration, None, itemText)
|
|
|
|
retText += '\n' + re.sub(r'\s+$', '', itemText) + "\n"
|
|
|
|
return retText
|
|
|
|
|
|
def handleNote(note, width):
|
|
title = None
|
|
if note.getChildrenByTagName('title'):
|
|
title = note.getChildrenByTagName('title')[0]
|
|
name = GetText(title.childNodes)
|
|
note.removeChild(title)
|
|
|
|
noteid = ''
|
|
if note.getAttribute('id'):
|
|
noteTagText = '*' + note.getAttribute('id') + '* '
|
|
noteTagText += '*' + ANCHOR_HASH[note.getAttribute('id')] + '*'
|
|
noteTagText = IndentParagraphs(noteTagText, width / 2)
|
|
noteid = RightJustify(noteTagText, width) + '\n'
|
|
|
|
noteText = handleElement(note, width - len("NOTE: "))
|
|
if title is not None:
|
|
noteText = name + '\n' + ('-' * len(name)) + '\n' + noteText
|
|
|
|
noteText = noteid + VertCatString("NOTE: ", None, noteText)
|
|
|
|
return noteText + "\n"
|
|
|
|
|
|
def handleParagraph(paragraph, width):
|
|
partext = handleElement(paragraph, width)
|
|
|
|
partext = re.sub(r'\n+$', '', partext)
|
|
partext = re.sub(r'^\n+', '', partext)
|
|
|
|
return partext + "\n\n"
|
|
|
|
|
|
def handleFormalParagraph(formalparagraph, width):
|
|
title = None
|
|
if formalparagraph.getChildrenByTagName('title'):
|
|
title = formalparagraph.getChildrenByTagName('title')[0]
|
|
name = GetText(title.childNodes)
|
|
formalparagraph.removeChild(title)
|
|
|
|
partext = handleElement(formalparagraph, width)
|
|
|
|
partext = re.sub(r'\n+$', '', partext)
|
|
partext = re.sub(r'^\n+', '', partext)
|
|
if title is not None:
|
|
partext = name + '\n' + ('-' * len(name)) + '\n' + partext
|
|
|
|
return partext + "\n\n"
|
|
|
|
|
|
def handleBlockQuote(block, width):
|
|
text = handleElement(block, width - BLOCK_QUOTE)
|
|
text = VertCatString(" " * BLOCK_QUOTE,
|
|
BLOCK_QUOTE, text)
|
|
|
|
return text + "\n"
|
|
|
|
|
|
def handleLink(link, width):
|
|
linkend = link.getAttribute('linkend')
|
|
if linkend not in ANCHOR_HASH:
|
|
print >> STDERR, "Warning: Link ID [%s] not found in TOC" % linkend
|
|
text = handleElement(link, width)
|
|
anchorpt = ANCHOR_HASH.get(linkend)
|
|
if not anchorpt:
|
|
anchorpt = ''
|
|
|
|
return text + ' [|' + anchorpt + '|]'
|
|
|
|
|
|
def handleAnchor(anchor, width):
|
|
anchorText = '*' + anchor.getAttribute('id') + '* '
|
|
anchorText += '*' + ANCHOR_HASH[anchor.getAttribute('id')] + '*'
|
|
return RightJustify(anchorText, width) + "\n"
|
|
|
|
|
|
def handleSection(section, width):
|
|
title = section.getChildrenByTagName('title')[0]
|
|
name = handleElement(title, width)
|
|
|
|
sectionid = section.getAttribute('id')
|
|
tagsformatted = ''
|
|
if sectionid in TOC_HASH:
|
|
tagsformatted = '*%s* ' % TOC_HASH[sectionid]
|
|
|
|
if sectionid in ANCHOR_HASH:
|
|
tagsformatted += '*%s* ' % ANCHOR_HASH[sectionid]
|
|
|
|
if sectionid and sectionid in TOC_HASH and \
|
|
sectionid != TOC_HASH[sectionid]:
|
|
tagsformatted += '*%s*' % sectionid
|
|
|
|
# try to indent to a width of 20
|
|
tagsformatted = RightJustify(IndentParagraphs(tagsformatted, 30), 0)
|
|
tagswidth = TextWidth(tagsformatted)
|
|
|
|
# width(name) + nspaces + width(tags) = 80
|
|
if len(tagsformatted) > 2:
|
|
header = VertCatString(name, 80 - tagswidth, tagsformatted)
|
|
else:
|
|
header = name
|
|
|
|
section.removeChild(title)
|
|
text = handleElement(section, width)
|
|
|
|
thislevel = LEVEL_HASH.get(sectionid, -1)
|
|
if thislevel == 0:
|
|
delim = '='
|
|
newlines = '\n\n'
|
|
elif thislevel == 1:
|
|
delim = '-'
|
|
newlines = '\n'
|
|
else:
|
|
delim = ''
|
|
newlines = '\n'
|
|
|
|
thisTOC = ''
|
|
if thislevel <= 1:
|
|
thisTOC = makeTOC(section, width, maxlevel=1)
|
|
|
|
return "\n" + (delim * TEXT_WIDTH) + \
|
|
"\n" + header + newlines + thisTOC + newlines + re.sub(
|
|
r'\n+$', '', text) + "\n"
|
|
|
|
|
|
def handleUlink(ulink, width):
|
|
url = ulink.getAttribute('url')
|
|
text = handleElement(ulink)
|
|
# URL_HASH is created at the very beginning
|
|
if url:
|
|
return text + ' |%s|' % URL_HASH[url]
|
|
else:
|
|
print >> STDERR, "Warning: url attribute empty for [%s]" % text
|
|
return text
|
|
|
|
|
|
def handleIndexTerm(indexterm, width):
|
|
return ''
|
|
|
|
|
|
def handleEmphasis(emphasis, width):
|
|
return '_' + GetText(emphasis.childNodes) + '_'
|
|
|
|
###############################################################################
|
|
# A dictionary for mapping xml tags to functions.
|
|
###############################################################################
|
|
handlerMaps = {
|
|
'articleinfo': handleArticleInfo,
|
|
'table': handleTableRoot,
|
|
'informaltable': handleTableRoot,
|
|
'code': handleCode,
|
|
'programlisting': handleCode,
|
|
'list': handleList,
|
|
'simplelist': handleList,
|
|
'orderedlist': handleList,
|
|
'para': handleParagraph,
|
|
'formalpara': handleFormalParagraph,
|
|
'note': handleNote,
|
|
'link': handleLink,
|
|
'anchor': handleAnchor,
|
|
'section': handleSection,
|
|
'blockquote': handleBlockQuote,
|
|
'ulink': handleUlink,
|
|
'emphasis': handleEmphasis,
|
|
'indexterm': handleIndexTerm
|
|
}
|
|
inlineTags = {'tag': 1, 'literal': 1, 'link': 1,
|
|
'ulink': 1, 'citetitle': 1, 'indexterm': 1,
|
|
'emphasis': 1, 'filename': 1}
|
|
|
|
# helper functions for usage() and printerr()
|
|
|
|
|
|
def usage():
|
|
print __doc__
|
|
|
|
|
|
def printerr(statement):
|
|
if DEBUG:
|
|
print >> STDERR, statement
|
|
|
|
|
|
def replaceComment(matchobj):
|
|
initspace = matchobj.group(1)
|
|
firstsent = matchobj.group(2)
|
|
code = matchobj.group(3)
|
|
|
|
if len(initspace) > 0:
|
|
if initspace[0] == '<':
|
|
lastspace = initspace
|
|
else:
|
|
lastspace = '<' + initspace[:-1]
|
|
else:
|
|
lastspace = initspace
|
|
|
|
return '\n' + initspace + firstsent + ' >\n' + code + '\n' + lastspace
|
|
|
|
|
|
if __name__ == "__main__":
|
|
option = {}
|
|
try:
|
|
opts, args = getopt.getopt(sys.argv[1:], 'ds', ['prefix=', 'help'])
|
|
for oa, ov in opts:
|
|
option[oa] = ov
|
|
|
|
except getopt.GetoptError:
|
|
print >> STDERR, "Usage error: db2vim --help for usage"
|
|
sys.exit(1)
|
|
|
|
if '--help' in option:
|
|
usage()
|
|
sys.exit(0)
|
|
|
|
TOC_PREFIX = option.get('--prefix', 'ls_')
|
|
DEBUG = '-d' in option
|
|
|
|
if len(args) != 1:
|
|
print >> STDERR, "Usage error: db2vim --help for usage"
|
|
sys.exit(1)
|
|
|
|
fileName = args[0]
|
|
FILENAME = re.sub(r'\.\w+$', r'.txt', fileName)
|
|
|
|
try:
|
|
fp = open(fileName)
|
|
except:
|
|
print "Error opening xml file"
|
|
|
|
dom = xml.dom.minidom.parse(fp)
|
|
|
|
modeline = r'''
|
|
================================================================================
|
|
About this file
|
|
|
|
This file was created automatically from its XML variant using db2vim. db2vim is
|
|
a python script which understands a very limited subset of the Docbook XML 4.2
|
|
DTD and outputs a plain text file in vim help format.
|
|
|
|
db2vim can be obtained via anonymous CVS from sourceforge.net. Use
|
|
|
|
cvs -d:pserver:anonymous@cvs.vim-latex.sf.net:/cvsroot/vim-latex co db2vim
|
|
|
|
Or you can visit the web-interface to sourceforge CVS at:
|
|
http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/vim-latex/db2vim/
|
|
================================================================================'''
|
|
|
|
STRICT = '-s' in option
|
|
|
|
pattern = re.compile(
|
|
r'\n([< ]*)([^\n]+)&codebegin;\n(.*?)&codeend;', re.DOTALL)
|
|
|
|
processedDoc = handleElement(dom.documentElement)
|
|
while re.search('&codebegin;', processedDoc):
|
|
processedDoc = re.sub(pattern, replaceComment, processedDoc)
|
|
|
|
urlsection = r"""
|
|
================================================================================
|
|
URLs used in this file
|
|
|
|
"""
|
|
labels = zip(URL_HASH.values(), URL_HASH.keys())
|
|
labels.sort()
|
|
for label, url in labels:
|
|
urlsection += '*%s* : %s\n' % (label, url)
|
|
|
|
processedDoc = processedDoc + urlsection + modeline
|
|
print processedDoc.encode('iso-8859-1')
|
|
# vim:et:sts=4
|