from the git log: * allow module to be imported * set UTF-8 as default encoding * better handling of Google Docs HTML * better handling of more edge-case inputs * nitpicky bugfixes to whitespace, emphasis, etc. * new config options
23 lines
895 B
Text
23 lines
895 B
Text
$NetBSD: patch-aa,v 1.8 2013/06/06 01:52:01 schmonz Exp $
|
|
|
|
Small cleanup patch from Debian.
|
|
|
|
--- html2text.py.orig 2012-01-07 15:00:40.000000000 +0000
|
|
+++ html2text.py
|
|
@@ -479,6 +479,7 @@ class HTML2Text(HTMLParser.HTMLParser):
|
|
if has_key(attrs, 'src'):
|
|
attrs['href'] = attrs['src']
|
|
alt = attrs.get('alt', '')
|
|
+ alt = re.sub('\n', ' ', alt)
|
|
if self.inline_links:
|
|
self.o("![")
|
|
self.o(alt)
|
|
@@ -512,7 +513,7 @@ class HTML2Text(HTMLParser.HTMLParser):
|
|
list_style = tag
|
|
numbering_start = list_numbering_start(attrs)
|
|
self.list.append({'name':list_style, 'num':numbering_start})
|
|
- else:
|
|
+ elif self.list:
|
|
if self.list: self.list.pop()
|
|
self.lastWasList = True
|
|
else:
|