Generate HTML episode description only when needed.

PR #1094 generated an HTML description for any episode that lacked one.
That however increased the database size (almost double in worst case)
because it was storing both text and html versions of each description.
This fixes that by storing the episode thumbnail URL in the database and
generating the HTML description only when shownotes are drawn.

The text description is now cleared for episodes with an HTML
description. This further reduces database size for feeds that provide
both. It also fixes an issue for feeds that provide different text and
HTML descriptions, because the short description would show the text
description and shownotes would show the HTML description. And EQL only
searched the text descriptions, which might not match what the user sees
in the shownotes.
This commit is contained in:
auouymous 2022-03-20 19:03:46 -06:00
parent 97d9459b90
commit 0fe74c18d2
8 changed files with 40 additions and 26 deletions

View File

@ -261,7 +261,7 @@ class gPodderExtension:
else:
info['title'] = title
info['subtitle'] = episode.description
info['subtitle'] = episode._text_description
if self.container.config.genre_tag is not None:
info['genre'] = self.container.config.genre_tag

View File

@ -211,8 +211,6 @@ class YoutubeFeed(model.Feed):
episodes = []
for en in self._ie_result['entries']:
guid = video_guid(en['id'])
description = util.remove_html_tags(en.get('description') or _('No description available'))
html_description = util.nice_html_description(en.get('thumbnail'), description)
if en.get('ext'):
mime_type = util.mimetype_from_extension('.{}'.format(en['ext']))
else:
@ -225,8 +223,9 @@ class YoutubeFeed(model.Feed):
ep = {
'title': en.get('title', guid),
'link': en.get('webpage_url'),
'description': description,
'description_html': html_description,
'episode_art_url': en.get('thumbnail'),
'description': util.remove_html_tags(en.get('description') or ''),
'description_html': '',
'url': en.get('webpage_url'),
'file_size': filesize,
'mime_type': mime_type,

View File

@ -113,7 +113,7 @@ class DBusPodcastsProxy(dbus.service.Object):
def episode_to_tuple(episode):
title = safe_str(episode.title)
url = safe_str(episode.url)
description = safe_first_line(episode.description)
description = safe_first_line(episode._text_description)
filename = safe_str(episode.download_filename)
file_type = safe_str(episode.file_type())
is_new = (episode.state == gpodder.STATE_NORMAL and episode.is_new)

View File

@ -219,7 +219,7 @@ class gPodderShownotesText(gPodderShownotes):
self.text_buffer.insert_at_cursor('\n')
self.text_buffer.insert_with_tags_by_name(self.text_buffer.get_end_iter(), details, 'details')
self.text_buffer.insert_at_cursor('\n\n')
for target, text in util.extract_hyperlinked_text(episode.description_html or episode.description):
for target, text in util.extract_hyperlinked_text(episode.html_description()):
hyperlinks.append((self.text_buffer.get_char_count(), target))
if target:
self.text_buffer.insert_with_tags_by_name(
@ -349,13 +349,10 @@ class gPodderShownotesHTML(gPodderShownotes):
'duration': episode.get_play_info_string()})
header_html = _('<div id="gpodder-title">\n%(heading)s\n<p>%(subheading)s</p>\n<p>%(details)s</p></div>\n') \
% dict(heading=heading, subheading=subheading, details=details)
description_html = episode.description_html
if not description_html:
description_html = re.sub(r'\n', '<br>\n', episode.description)
# uncomment to prevent background override in html shownotes
# self.manager.remove_all_style_sheets ()
logger.debug("base uri: %s (chan:%s)", self._base_uri, episode.channel.url)
self.html_view.load_html(header_html + description_html, self._base_uri)
self.html_view.load_html(header_html + episode.html_description(), self._base_uri)
# uncomment to show web inspector
# self.html_view.get_inspector().show()
self.episode = episode

View File

@ -175,12 +175,14 @@ class PodcastParserFeed(Feed):
existing_episode.total_time = youtube.get_total_time(episode)
existing_episode.update_from(episode)
existing_episode.cache_text_description()
existing_episode.save()
continue
elif episode.total_time == 0 and 'youtube' in episode.url:
# query duration for new youtube episodes
episode.total_time = youtube.get_total_time(episode)
episode.cache_text_description()
episode.save()
new_episodes.append(episode)
return new_episodes, seen_guids
@ -271,7 +273,7 @@ class PodcastEpisode(PodcastModelObject):
MAX_FILENAME_LENGTH = 120 # without extension
MAX_FILENAME_WITH_EXT_LENGTH = 140 - len(".partial.webm") # with extension
__slots__ = schema.EpisodeColumns + ('_download_error',)
__slots__ = schema.EpisodeColumns + ('_download_error', '_text_description',)
def _deprecated(self):
raise Exception('Property is deprecated!')
@ -289,13 +291,13 @@ class PodcastEpisode(PodcastModelObject):
episode.guid = entry['guid']
episode.title = entry['title']
episode.link = entry['link']
episode.description = entry['description']
episode.episode_art_url = entry.get('episode_art_url')
if entry.get('description_html'):
episode.description = ''
episode.description_html = entry['description_html']
else:
thumbnail = entry.get('episode_art_url')
description = util.remove_html_tags(episode.description or _('No description available'))
episode.description_html = util.nice_html_description(thumbnail, description)
episode.description = util.remove_html_tags(entry['description'] or '')
episode.description_html = ''
episode.total_time = entry['total_time']
episode.published = entry['published']
@ -390,6 +392,7 @@ class PodcastEpisode(PodcastModelObject):
self.last_playback = 0
self._download_error = None
self._text_description = ''
@property
def channel(self):
@ -574,9 +577,21 @@ class PodcastEpisode(PodcastModelObject):
age_prop = property(fget=get_age_string)
def cache_text_description(self):
if self.description:
self._text_description = self.description
elif self.description_html:
self._text_description = util.remove_html_tags(self.description_html)
else:
self._text_description = ''
def html_description(self):
return self.description_html \
or util.nice_html_description(self.episode_art_url, self.description or _('No description available'))
def one_line_description(self):
MAX_LINE_LENGTH = 120
desc = util.remove_html_tags(self.description or '')
desc = self._text_description
desc = re.sub(r'\s+', ' ', desc).strip()
if not desc:
return _('No description available')
@ -1104,7 +1119,9 @@ class PodcastChannel(PodcastModelObject):
Returns: A new PodcastEpisode object
"""
return self.EpisodeClass.create_from_dict(d, self)
episode = self.EpisodeClass.create_from_dict(d, self)
episode.cache_text_description()
return episode
def _consume_updated_title(self, new_title):
# Replace multi-space and newlines with single space (Maemo bug 11173)

View File

@ -167,7 +167,8 @@ class SoundcloudUser(object):
yield {
'title': track.get('title', track.get('permalink')) or _('Unknown track'),
'link': track.get('permalink_url') or 'https://soundcloud.com/' + self.username,
'description': track.get('description') or _('No description available'),
'description': util.remove_html_tags(track.get('description') or ''),
'description_html': '',
'url': url,
'file_size': int(filesize),
'mime_type': filetype,

View File

@ -46,7 +46,7 @@ class Matcher(object):
return (needle in haystack)
if needle in self._episode.title:
return True
return (needle in self._episode.description)
return (needle in self._episode._text_description)
# case-insensitive search in haystack, or both title and description if no haystack
def s(needle, haystack=None):
@ -55,7 +55,7 @@ class Matcher(object):
return (needle in haystack.casefold())
if needle in self._episode.title.casefold():
return True
return (needle in self._episode.description.casefold())
return (needle in self._episode._text_description.casefold())
# case-sensitive regular expression search in haystack, or both title and description if no haystack
def R(needle, haystack=None):
@ -64,7 +64,7 @@ class Matcher(object):
return regexp.search(haystack)
if regexp.search(self._episode.title):
return True
return regexp.search(self._episode.description)
return regexp.search(self._episode._text_description)
# case-insensitive regular expression search in haystack, or both title and description if no haystack
def r(needle, haystack=None):
@ -73,7 +73,7 @@ class Matcher(object):
return regexp.search(haystack)
if regexp.search(self._episode.title):
return True
return regexp.search(self._episode.description)
return regexp.search(self._episode._text_description)
return bool(eval(term, {'__builtins__': None, 'S': S, 's': s, 'R': R, 'r': r}, self))
except Exception as e:
@ -108,7 +108,7 @@ class Matcher(object):
elif k == 'title':
return episode.title
elif k == 'description':
return episode.description
return episode._text_description
elif k == 'since':
return (datetime.datetime.now() - datetime.datetime.fromtimestamp(episode.published)).days
elif k == 'age':
@ -215,7 +215,7 @@ class EQL(object):
if self._regex:
return re.search(self._query, episode.title, self._flags) is not None
elif self._string:
return self._query in episode.title.lower() or self._query in episode.description.lower()
return self._query in episode.title.lower() or self._query in episode._text_description.lower()
return Matcher(episode).match(self._query)

View File

@ -396,7 +396,7 @@ class iPodDevice(Device):
return False
track = self.ipod.add_track(local_filename, episode.title, episode.channel.title,
util.remove_html_tags(episode.description), episode.url, episode.channel.url,
episode._text_description, episode.url, episode.channel.url,
episode.published, get_track_length(local_filename), episode.file_type() == 'audio')
self.update_from_episode(track, episode, initial=True)