Merge pull request #277 from vossad01/html_description-migration
Data migration of `description` column during migration to v7 schema
This commit is contained in:
commit
f14c465d81
|
@ -147,11 +147,11 @@ class PodcastEpisode(PodcastModelObject):
|
|||
episode.description = entry['description']
|
||||
if entry.get('description_html'):
|
||||
episode.description_html = entry['description_html']
|
||||
# XXX: That's not a very well-informed heuristic to check
|
||||
# if the description already contains HTML. Better ideas?
|
||||
# TODO: This really should be handled in podcastparser and not here.
|
||||
elif '<' in entry['description']:
|
||||
elif util.is_html(entry['description']):
|
||||
episode.description_html = entry['description']
|
||||
episode.description = util.remove_html_tags(entry['description'])
|
||||
|
||||
episode.total_time = entry['total_time']
|
||||
episode.published = entry['published']
|
||||
episode.payment_url = entry['payment_url']
|
||||
|
|
|
@ -28,6 +28,8 @@ import shutil
|
|||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from gpodder import util
|
||||
|
||||
EpisodeColumns = (
|
||||
'podcast_id',
|
||||
'title',
|
||||
|
@ -108,6 +110,9 @@ UPGRADE_SQL = [
|
|||
# Version 7: Add HTML description
|
||||
(6, 7, """
|
||||
ALTER TABLE episode ADD COLUMN description_html TEXT NOT NULL DEFAULT ''
|
||||
UPDATE episode SET description_html=description WHERE is_html(description)
|
||||
UPDATE episode SET description=remove_html_tags(description_html) WHERE is_html(description)
|
||||
UPDATE podcast SET http_last_modified=NULL, http_etag=NULL
|
||||
"""),
|
||||
]
|
||||
|
||||
|
@ -194,6 +199,9 @@ def upgrade(db, filename):
|
|||
initialize_database(db)
|
||||
return
|
||||
|
||||
db.create_function('is_html', 1, util.is_html)
|
||||
db.create_function('remove_html_tags', 1, util.remove_html_tags)
|
||||
|
||||
version = db.execute('SELECT version FROM version').fetchone()[0]
|
||||
if version == CURRENT_VERSION:
|
||||
return
|
||||
|
|
|
@ -595,6 +595,19 @@ def delete_file(filename):
|
|||
pass
|
||||
|
||||
|
||||
def is_html(text):
|
||||
"""Heuristically tell if text is HTML
|
||||
|
||||
By looking for an open tag (more or less:)
|
||||
>>> is_html('<h1>HELLO</h1>')
|
||||
True
|
||||
>>> is_html('a < b < c')
|
||||
False
|
||||
"""
|
||||
html_test = re.compile('<[a-z][a-z0-9]*(?:\s.*?>|\/?>)', re.IGNORECASE | re.DOTALL)
|
||||
return bool(html_test.search(text))
|
||||
|
||||
|
||||
def remove_html_tags(html):
|
||||
"""
|
||||
Remove HTML tags from a string and replace numeric and
|
||||
|
|
Loading…
Reference in New Issue