schema v7: update description and description_html fields
1. initialize description_html when description used to contain HTML 2. strip HTML tags from description 3. reset last updated from feed to force fetch description/description_html
This commit is contained in:
parent
13c1b8e7db
commit
01e0655943
|
@ -28,6 +28,8 @@ import shutil
|
|||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from gpodder import util
|
||||
|
||||
EpisodeColumns = (
|
||||
'podcast_id',
|
||||
'title',
|
||||
|
@ -108,6 +110,9 @@ UPGRADE_SQL = [
|
|||
# Version 7: Add HTML description
|
||||
(6, 7, """
|
||||
ALTER TABLE episode ADD COLUMN description_html TEXT NOT NULL DEFAULT ''
|
||||
UPDATE episode SET description_html=description WHERE is_html(description)
|
||||
UPDATE episode SET description=remove_html_tags(description_html) WHERE is_html(description)
|
||||
UPDATE podcast SET http_last_modified=NULL, http_etag=NULL
|
||||
"""),
|
||||
]
|
||||
|
||||
|
@ -194,6 +199,9 @@ def upgrade(db, filename):
|
|||
initialize_database(db)
|
||||
return
|
||||
|
||||
db.create_function('is_html', 1, util.is_html)
|
||||
db.create_function('remove_html_tags', 1, util.remove_html_tags)
|
||||
|
||||
version = db.execute('SELECT version FROM version').fetchone()[0]
|
||||
if version == CURRENT_VERSION:
|
||||
return
|
||||
|
|
|
@ -595,6 +595,19 @@ def delete_file(filename):
|
|||
pass
|
||||
|
||||
|
||||
def is_html(text):
|
||||
"""Heuristically tell if text is HTML
|
||||
|
||||
By looking for an open tag (more or less:)
|
||||
>>> is_html('<h1>HELLO</h1>')
|
||||
True
|
||||
>>> is_html('a < b < c')
|
||||
False
|
||||
"""
|
||||
e = re.compile('<[a-zA-Z][a-zA-Z0-9]*(\\s.*)?>')
|
||||
return e.search(text) is not None
|
||||
|
||||
|
||||
def remove_html_tags(html):
|
||||
"""
|
||||
Remove HTML tags from a string and replace numeric and
|
||||
|
|
Loading…
Reference in a new issue