schema v7: update description and description_html fields
1. initialize description_html when description used to contain HTML 2. strip HTML tags from description 3. reset last updated from feed to force fetch description/description_html
This commit is contained in:
parent
13c1b8e7db
commit
01e0655943
2 changed files with 21 additions and 0 deletions
|
@ -28,6 +28,8 @@ import shutil
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
from gpodder import util
|
||||||
|
|
||||||
EpisodeColumns = (
|
EpisodeColumns = (
|
||||||
'podcast_id',
|
'podcast_id',
|
||||||
'title',
|
'title',
|
||||||
|
@ -108,6 +110,9 @@ UPGRADE_SQL = [
|
||||||
# Version 7: Add HTML description
|
# Version 7: Add HTML description
|
||||||
(6, 7, """
|
(6, 7, """
|
||||||
ALTER TABLE episode ADD COLUMN description_html TEXT NOT NULL DEFAULT ''
|
ALTER TABLE episode ADD COLUMN description_html TEXT NOT NULL DEFAULT ''
|
||||||
|
UPDATE episode SET description_html=description WHERE is_html(description)
|
||||||
|
UPDATE episode SET description=remove_html_tags(description_html) WHERE is_html(description)
|
||||||
|
UPDATE podcast SET http_last_modified=NULL, http_etag=NULL
|
||||||
"""),
|
"""),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -194,6 +199,9 @@ def upgrade(db, filename):
|
||||||
initialize_database(db)
|
initialize_database(db)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
db.create_function('is_html', 1, util.is_html)
|
||||||
|
db.create_function('remove_html_tags', 1, util.remove_html_tags)
|
||||||
|
|
||||||
version = db.execute('SELECT version FROM version').fetchone()[0]
|
version = db.execute('SELECT version FROM version').fetchone()[0]
|
||||||
if version == CURRENT_VERSION:
|
if version == CURRENT_VERSION:
|
||||||
return
|
return
|
||||||
|
|
|
@ -595,6 +595,19 @@ def delete_file(filename):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def is_html(text):
|
||||||
|
"""Heuristically tell if text is HTML
|
||||||
|
|
||||||
|
By looking for an open tag (more or less:)
|
||||||
|
>>> is_html('<h1>HELLO</h1>')
|
||||||
|
True
|
||||||
|
>>> is_html('a < b < c')
|
||||||
|
False
|
||||||
|
"""
|
||||||
|
e = re.compile('<[a-zA-Z][a-zA-Z0-9]*(\\s.*)?>')
|
||||||
|
return e.search(text) is not None
|
||||||
|
|
||||||
|
|
||||||
def remove_html_tags(html):
|
def remove_html_tags(html):
|
||||||
"""
|
"""
|
||||||
Remove HTML tags from a string and replace numeric and
|
Remove HTML tags from a string and replace numeric and
|
||||||
|
|
Loading…
Reference in a new issue