Generate HTML episode description only when needed.
PR #1094 generated an HTML description for any episode that lacked one. That however increased the database size (almost double in worst case) because it was storing both text and html versions of each description. This fixes that by storing the episode thumbnail URL in the database and generating the HTML description only when shownotes are drawn. The text description is now cleared for episodes with an HTML description. This further reduces database size for feeds that provide both. It also fixes an issue for feeds that provide different text and HTML descriptions, because the short description would show the text description and shownotes would show the HTML description. And EQL only searched the text descriptions, which might not match what the user sees in the shownotes.
This commit is contained in:
parent
97d9459b90
commit
0fe74c18d2
|
@ -261,7 +261,7 @@ class gPodderExtension:
|
|||
else:
|
||||
info['title'] = title
|
||||
|
||||
info['subtitle'] = episode.description
|
||||
info['subtitle'] = episode._text_description
|
||||
|
||||
if self.container.config.genre_tag is not None:
|
||||
info['genre'] = self.container.config.genre_tag
|
||||
|
|
|
@ -211,8 +211,6 @@ class YoutubeFeed(model.Feed):
|
|||
episodes = []
|
||||
for en in self._ie_result['entries']:
|
||||
guid = video_guid(en['id'])
|
||||
description = util.remove_html_tags(en.get('description') or _('No description available'))
|
||||
html_description = util.nice_html_description(en.get('thumbnail'), description)
|
||||
if en.get('ext'):
|
||||
mime_type = util.mimetype_from_extension('.{}'.format(en['ext']))
|
||||
else:
|
||||
|
@ -225,8 +223,9 @@ class YoutubeFeed(model.Feed):
|
|||
ep = {
|
||||
'title': en.get('title', guid),
|
||||
'link': en.get('webpage_url'),
|
||||
'description': description,
|
||||
'description_html': html_description,
|
||||
'episode_art_url': en.get('thumbnail'),
|
||||
'description': util.remove_html_tags(en.get('description') or ''),
|
||||
'description_html': '',
|
||||
'url': en.get('webpage_url'),
|
||||
'file_size': filesize,
|
||||
'mime_type': mime_type,
|
||||
|
|
|
@ -113,7 +113,7 @@ class DBusPodcastsProxy(dbus.service.Object):
|
|||
def episode_to_tuple(episode):
|
||||
title = safe_str(episode.title)
|
||||
url = safe_str(episode.url)
|
||||
description = safe_first_line(episode.description)
|
||||
description = safe_first_line(episode._text_description)
|
||||
filename = safe_str(episode.download_filename)
|
||||
file_type = safe_str(episode.file_type())
|
||||
is_new = (episode.state == gpodder.STATE_NORMAL and episode.is_new)
|
||||
|
|
|
@ -219,7 +219,7 @@ class gPodderShownotesText(gPodderShownotes):
|
|||
self.text_buffer.insert_at_cursor('\n')
|
||||
self.text_buffer.insert_with_tags_by_name(self.text_buffer.get_end_iter(), details, 'details')
|
||||
self.text_buffer.insert_at_cursor('\n\n')
|
||||
for target, text in util.extract_hyperlinked_text(episode.description_html or episode.description):
|
||||
for target, text in util.extract_hyperlinked_text(episode.html_description()):
|
||||
hyperlinks.append((self.text_buffer.get_char_count(), target))
|
||||
if target:
|
||||
self.text_buffer.insert_with_tags_by_name(
|
||||
|
@ -349,13 +349,10 @@ class gPodderShownotesHTML(gPodderShownotes):
|
|||
'duration': episode.get_play_info_string()})
|
||||
header_html = _('<div id="gpodder-title">\n%(heading)s\n<p>%(subheading)s</p>\n<p>%(details)s</p></div>\n') \
|
||||
% dict(heading=heading, subheading=subheading, details=details)
|
||||
description_html = episode.description_html
|
||||
if not description_html:
|
||||
description_html = re.sub(r'\n', '<br>\n', episode.description)
|
||||
# uncomment to prevent background override in html shownotes
|
||||
# self.manager.remove_all_style_sheets ()
|
||||
logger.debug("base uri: %s (chan:%s)", self._base_uri, episode.channel.url)
|
||||
self.html_view.load_html(header_html + description_html, self._base_uri)
|
||||
self.html_view.load_html(header_html + episode.html_description(), self._base_uri)
|
||||
# uncomment to show web inspector
|
||||
# self.html_view.get_inspector().show()
|
||||
self.episode = episode
|
||||
|
|
|
@ -175,12 +175,14 @@ class PodcastParserFeed(Feed):
|
|||
existing_episode.total_time = youtube.get_total_time(episode)
|
||||
|
||||
existing_episode.update_from(episode)
|
||||
existing_episode.cache_text_description()
|
||||
existing_episode.save()
|
||||
continue
|
||||
elif episode.total_time == 0 and 'youtube' in episode.url:
|
||||
# query duration for new youtube episodes
|
||||
episode.total_time = youtube.get_total_time(episode)
|
||||
|
||||
episode.cache_text_description()
|
||||
episode.save()
|
||||
new_episodes.append(episode)
|
||||
return new_episodes, seen_guids
|
||||
|
@ -271,7 +273,7 @@ class PodcastEpisode(PodcastModelObject):
|
|||
MAX_FILENAME_LENGTH = 120 # without extension
|
||||
MAX_FILENAME_WITH_EXT_LENGTH = 140 - len(".partial.webm") # with extension
|
||||
|
||||
__slots__ = schema.EpisodeColumns + ('_download_error',)
|
||||
__slots__ = schema.EpisodeColumns + ('_download_error', '_text_description',)
|
||||
|
||||
def _deprecated(self):
|
||||
raise Exception('Property is deprecated!')
|
||||
|
@ -289,13 +291,13 @@ class PodcastEpisode(PodcastModelObject):
|
|||
episode.guid = entry['guid']
|
||||
episode.title = entry['title']
|
||||
episode.link = entry['link']
|
||||
episode.description = entry['description']
|
||||
episode.episode_art_url = entry.get('episode_art_url')
|
||||
if entry.get('description_html'):
|
||||
episode.description = ''
|
||||
episode.description_html = entry['description_html']
|
||||
else:
|
||||
thumbnail = entry.get('episode_art_url')
|
||||
description = util.remove_html_tags(episode.description or _('No description available'))
|
||||
episode.description_html = util.nice_html_description(thumbnail, description)
|
||||
episode.description = util.remove_html_tags(entry['description'] or '')
|
||||
episode.description_html = ''
|
||||
|
||||
episode.total_time = entry['total_time']
|
||||
episode.published = entry['published']
|
||||
|
@ -390,6 +392,7 @@ class PodcastEpisode(PodcastModelObject):
|
|||
self.last_playback = 0
|
||||
|
||||
self._download_error = None
|
||||
self._text_description = ''
|
||||
|
||||
@property
|
||||
def channel(self):
|
||||
|
@ -574,9 +577,21 @@ class PodcastEpisode(PodcastModelObject):
|
|||
|
||||
age_prop = property(fget=get_age_string)
|
||||
|
||||
def cache_text_description(self):
|
||||
if self.description:
|
||||
self._text_description = self.description
|
||||
elif self.description_html:
|
||||
self._text_description = util.remove_html_tags(self.description_html)
|
||||
else:
|
||||
self._text_description = ''
|
||||
|
||||
def html_description(self):
|
||||
return self.description_html \
|
||||
or util.nice_html_description(self.episode_art_url, self.description or _('No description available'))
|
||||
|
||||
def one_line_description(self):
|
||||
MAX_LINE_LENGTH = 120
|
||||
desc = util.remove_html_tags(self.description or '')
|
||||
desc = self._text_description
|
||||
desc = re.sub(r'\s+', ' ', desc).strip()
|
||||
if not desc:
|
||||
return _('No description available')
|
||||
|
@ -1104,7 +1119,9 @@ class PodcastChannel(PodcastModelObject):
|
|||
|
||||
Returns: A new PodcastEpisode object
|
||||
"""
|
||||
return self.EpisodeClass.create_from_dict(d, self)
|
||||
episode = self.EpisodeClass.create_from_dict(d, self)
|
||||
episode.cache_text_description()
|
||||
return episode
|
||||
|
||||
def _consume_updated_title(self, new_title):
|
||||
# Replace multi-space and newlines with single space (Maemo bug 11173)
|
||||
|
|
|
@ -167,7 +167,8 @@ class SoundcloudUser(object):
|
|||
yield {
|
||||
'title': track.get('title', track.get('permalink')) or _('Unknown track'),
|
||||
'link': track.get('permalink_url') or 'https://soundcloud.com/' + self.username,
|
||||
'description': track.get('description') or _('No description available'),
|
||||
'description': util.remove_html_tags(track.get('description') or ''),
|
||||
'description_html': '',
|
||||
'url': url,
|
||||
'file_size': int(filesize),
|
||||
'mime_type': filetype,
|
||||
|
|
|
@ -46,7 +46,7 @@ class Matcher(object):
|
|||
return (needle in haystack)
|
||||
if needle in self._episode.title:
|
||||
return True
|
||||
return (needle in self._episode.description)
|
||||
return (needle in self._episode._text_description)
|
||||
|
||||
# case-insensitive search in haystack, or both title and description if no haystack
|
||||
def s(needle, haystack=None):
|
||||
|
@ -55,7 +55,7 @@ class Matcher(object):
|
|||
return (needle in haystack.casefold())
|
||||
if needle in self._episode.title.casefold():
|
||||
return True
|
||||
return (needle in self._episode.description.casefold())
|
||||
return (needle in self._episode._text_description.casefold())
|
||||
|
||||
# case-sensitive regular expression search in haystack, or both title and description if no haystack
|
||||
def R(needle, haystack=None):
|
||||
|
@ -64,7 +64,7 @@ class Matcher(object):
|
|||
return regexp.search(haystack)
|
||||
if regexp.search(self._episode.title):
|
||||
return True
|
||||
return regexp.search(self._episode.description)
|
||||
return regexp.search(self._episode._text_description)
|
||||
|
||||
# case-insensitive regular expression search in haystack, or both title and description if no haystack
|
||||
def r(needle, haystack=None):
|
||||
|
@ -73,7 +73,7 @@ class Matcher(object):
|
|||
return regexp.search(haystack)
|
||||
if regexp.search(self._episode.title):
|
||||
return True
|
||||
return regexp.search(self._episode.description)
|
||||
return regexp.search(self._episode._text_description)
|
||||
|
||||
return bool(eval(term, {'__builtins__': None, 'S': S, 's': s, 'R': R, 'r': r}, self))
|
||||
except Exception as e:
|
||||
|
@ -108,7 +108,7 @@ class Matcher(object):
|
|||
elif k == 'title':
|
||||
return episode.title
|
||||
elif k == 'description':
|
||||
return episode.description
|
||||
return episode._text_description
|
||||
elif k == 'since':
|
||||
return (datetime.datetime.now() - datetime.datetime.fromtimestamp(episode.published)).days
|
||||
elif k == 'age':
|
||||
|
@ -215,7 +215,7 @@ class EQL(object):
|
|||
if self._regex:
|
||||
return re.search(self._query, episode.title, self._flags) is not None
|
||||
elif self._string:
|
||||
return self._query in episode.title.lower() or self._query in episode.description.lower()
|
||||
return self._query in episode.title.lower() or self._query in episode._text_description.lower()
|
||||
|
||||
return Matcher(episode).match(self._query)
|
||||
|
||||
|
|
|
@ -396,7 +396,7 @@ class iPodDevice(Device):
|
|||
return False
|
||||
|
||||
track = self.ipod.add_track(local_filename, episode.title, episode.channel.title,
|
||||
util.remove_html_tags(episode.description), episode.url, episode.channel.url,
|
||||
episode._text_description, episode.url, episode.channel.url,
|
||||
episode.published, get_track_length(local_filename), episode.file_type() == 'audio')
|
||||
|
||||
self.update_from_episode(track, episode, initial=True)
|
||||
|
|
Loading…
Reference in New Issue