Merge pull request #1246 from auouymous/ondemand-html-description

Generate HTML episode description only when needed.
This commit is contained in:
Eric Le Lay 2022-06-20 13:25:00 +02:00 committed by GitHub
commit b1bd429be0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 63 additions and 29 deletions

View File

@ -261,7 +261,7 @@ class gPodderExtension:
else:
info['title'] = title
info['subtitle'] = episode.description
info['subtitle'] = episode._text_description
if self.container.config.genre_tag is not None:
info['genre'] = self.container.config.genre_tag

View File

@ -211,8 +211,6 @@ class YoutubeFeed(model.Feed):
episodes = []
for en in self._ie_result['entries']:
guid = video_guid(en['id'])
description = util.remove_html_tags(en.get('description') or _('No description available'))
html_description = util.nice_html_description(en.get('thumbnail'), description)
if en.get('ext'):
mime_type = util.mimetype_from_extension('.{}'.format(en['ext']))
else:
@ -225,8 +223,9 @@ class YoutubeFeed(model.Feed):
ep = {
'title': en.get('title', guid),
'link': en.get('webpage_url'),
'description': description,
'description_html': html_description,
'episode_art_url': en.get('thumbnail'),
'description': util.remove_html_tags(en.get('description') or ''),
'description_html': '',
'url': en.get('webpage_url'),
'file_size': filesize,
'mime_type': mime_type,

View File

@ -113,7 +113,7 @@ class DBusPodcastsProxy(dbus.service.Object):
def episode_to_tuple(episode):
title = safe_str(episode.title)
url = safe_str(episode.url)
description = safe_first_line(episode.description)
description = safe_first_line(episode._text_description)
filename = safe_str(episode.download_filename)
file_type = safe_str(episode.file_type())
is_new = (episode.state == gpodder.STATE_NORMAL and episode.is_new)

View File

@ -219,7 +219,7 @@ class gPodderShownotesText(gPodderShownotes):
self.text_buffer.insert_at_cursor('\n')
self.text_buffer.insert_with_tags_by_name(self.text_buffer.get_end_iter(), details, 'details')
self.text_buffer.insert_at_cursor('\n\n')
for target, text in util.extract_hyperlinked_text(episode.description_html or episode.description):
for target, text in util.extract_hyperlinked_text(episode.html_description()):
hyperlinks.append((self.text_buffer.get_char_count(), target))
if target:
self.text_buffer.insert_with_tags_by_name(
@ -349,13 +349,10 @@ class gPodderShownotesHTML(gPodderShownotes):
'duration': episode.get_play_info_string()})
header_html = _('<div id="gpodder-title">\n%(heading)s\n<p>%(subheading)s</p>\n<p>%(details)s</p></div>\n') \
% dict(heading=heading, subheading=subheading, details=details)
description_html = episode.description_html
if not description_html:
description_html = re.sub(r'\n', '<br>\n', episode.description)
# uncomment to prevent background override in html shownotes
# self.manager.remove_all_style_sheets ()
logger.debug("base uri: %s (chan:%s)", self._base_uri, episode.channel.url)
self.html_view.load_html(header_html + description_html, self._base_uri)
self.html_view.load_html(header_html + episode.html_description(), self._base_uri)
# uncomment to show web inspector
# self.html_view.get_inspector().show()
self.episode = episode

View File

@ -28,6 +28,7 @@ import collections
import datetime
import glob
import hashlib
import json
import logging
import os
import re
@ -175,12 +176,14 @@ class PodcastParserFeed(Feed):
existing_episode.total_time = youtube.get_total_time(episode)
existing_episode.update_from(episode)
existing_episode.cache_text_description()
existing_episode.save()
continue
elif episode.total_time == 0 and 'youtube' in episode.url:
# query duration for new youtube episodes
episode.total_time = youtube.get_total_time(episode)
episode.cache_text_description()
episode.save()
new_episodes.append(episode)
return new_episodes, seen_guids
@ -271,7 +274,7 @@ class PodcastEpisode(PodcastModelObject):
MAX_FILENAME_LENGTH = 120 # without extension
MAX_FILENAME_WITH_EXT_LENGTH = 140 - len(".partial.webm") # with extension
__slots__ = schema.EpisodeColumns + ('_download_error',)
__slots__ = schema.EpisodeColumns + ('_download_error', '_text_description',)
def _deprecated(self):
raise Exception('Property is deprecated!')
@ -289,17 +292,20 @@ class PodcastEpisode(PodcastModelObject):
episode.guid = entry['guid']
episode.title = entry['title']
episode.link = entry['link']
episode.description = entry['description']
episode.episode_art_url = entry.get('episode_art_url')
if entry.get('description_html'):
episode.description = ''
episode.description_html = entry['description_html']
else:
thumbnail = entry.get('episode_art_url')
description = util.remove_html_tags(episode.description or _('No description available'))
episode.description_html = util.nice_html_description(thumbnail, description)
episode.description = util.remove_html_tags(entry['description'] or '')
episode.description_html = ''
episode.total_time = entry['total_time']
episode.published = entry['published']
episode.payment_url = entry['payment_url']
episode.chapters = None
if entry.get("chapters"):
episode.chapters = json.dumps(entry["chapters"])
audio_available = any(enclosure['mime_type'].startswith('audio/') for enclosure in entry['enclosures'])
video_available = any(enclosure['mime_type'].startswith('video/') for enclosure in entry['enclosures'])
@ -368,8 +374,10 @@ class PodcastEpisode(PodcastModelObject):
self.file_size = 0
self.mime_type = 'application/octet-stream'
self.guid = ''
self.episode_art_url = None
self.description = ''
self.description_html = ''
self.chapters = None
self.link = ''
self.published = 0
self.download_filename = None
@ -388,6 +396,7 @@ class PodcastEpisode(PodcastModelObject):
self.last_playback = 0
self._download_error = None
self._text_description = ''
@property
def channel(self):
@ -572,9 +581,21 @@ class PodcastEpisode(PodcastModelObject):
age_prop = property(fget=get_age_string)
def cache_text_description(self):
if self.description:
self._text_description = self.description
elif self.description_html:
self._text_description = util.remove_html_tags(self.description_html)
else:
self._text_description = ''
def html_description(self):
return self.description_html \
or util.nice_html_description(self.episode_art_url, self.description or _('No description available'))
def one_line_description(self):
MAX_LINE_LENGTH = 120
desc = util.remove_html_tags(self.description or '')
desc = self._text_description
desc = re.sub(r'\s+', ' ', desc).strip()
if not desc:
return _('No description available')
@ -864,7 +885,8 @@ class PodcastEpisode(PodcastModelObject):
return '-'
def update_from(self, episode):
for k in ('title', 'url', 'description', 'description_html', 'link', 'published', 'guid', 'payment_url'):
for k in ('title', 'url', 'episode_art_url', 'description', 'description_html', 'chapters', 'link',
'published', 'guid', 'payment_url'):
setattr(self, k, getattr(episode, k))
# Don't overwrite file size on downloaded episodes
# See #648 refreshing a youtube podcast clears downloaded file size
@ -1101,7 +1123,9 @@ class PodcastChannel(PodcastModelObject):
Returns: A new PodcastEpisode object
"""
return self.EpisodeClass.create_from_dict(d, self)
episode = self.EpisodeClass.create_from_dict(d, self)
episode.cache_text_description()
return episode
def _consume_updated_title(self, new_title):
# Replace multi-space and newlines with single space (Maemo bug 11173)

View File

@ -167,7 +167,8 @@ class SoundcloudUser(object):
yield {
'title': track.get('title', track.get('permalink')) or _('Unknown track'),
'link': track.get('permalink_url') or 'https://soundcloud.com/' + self.username,
'description': track.get('description') or _('No description available'),
'description': util.remove_html_tags(track.get('description') or ''),
'description_html': '',
'url': url,
'file_size': int(filesize),
'mime_type': filetype,

View File

@ -46,7 +46,7 @@ class Matcher(object):
return (needle in haystack)
if needle in self._episode.title:
return True
return (needle in self._episode.description)
return (needle in self._episode._text_description)
# case-insensitive search in haystack, or both title and description if no haystack
def s(needle, haystack=None):
@ -55,7 +55,7 @@ class Matcher(object):
return (needle in haystack.casefold())
if needle in self._episode.title.casefold():
return True
return (needle in self._episode.description.casefold())
return (needle in self._episode._text_description.casefold())
# case-sensitive regular expression search in haystack, or both title and description if no haystack
def R(needle, haystack=None):
@ -64,7 +64,7 @@ class Matcher(object):
return regexp.search(haystack)
if regexp.search(self._episode.title):
return True
return regexp.search(self._episode.description)
return regexp.search(self._episode._text_description)
# case-insensitive regular expression search in haystack, or both title and description if no haystack
def r(needle, haystack=None):
@ -73,7 +73,7 @@ class Matcher(object):
return regexp.search(haystack)
if regexp.search(self._episode.title):
return True
return regexp.search(self._episode.description)
return regexp.search(self._episode._text_description)
return bool(eval(term, {'__builtins__': None, 'S': S, 's': s, 'R': R, 'r': r}, self))
except Exception as e:
@ -108,7 +108,7 @@ class Matcher(object):
elif k == 'title':
return episode.title
elif k == 'description':
return episode.description
return episode._text_description
elif k == 'since':
return (datetime.datetime.now() - datetime.datetime.fromtimestamp(episode.published)).days
elif k == 'age':
@ -215,7 +215,7 @@ class EQL(object):
if self._regex:
return re.search(self._query, episode.title, self._flags) is not None
elif self._string:
return self._query in episode.title.lower() or self._query in episode.description.lower()
return self._query in episode.title.lower() or self._query in episode._text_description.lower()
return Matcher(episode).match(self._query)

View File

@ -50,6 +50,8 @@ EpisodeColumns = (
'last_playback',
'payment_url',
'description_html',
'episode_art_url',
'chapters',
)
PodcastColumns = (
@ -72,7 +74,7 @@ PodcastColumns = (
'cover_thumb',
)
CURRENT_VERSION = 7
CURRENT_VERSION = 8
# SQL commands to upgrade old database versions to new ones
@ -114,6 +116,13 @@ UPGRADE_SQL = [
UPDATE episode SET description=remove_html_tags(description_html) WHERE is_html(description)
UPDATE podcast SET http_last_modified=NULL, http_etag=NULL
"""),
# Version 8: Add episode thumbnail URL and chapters
(7, 8, """
ALTER TABLE episode ADD COLUMN episode_art_url TEXT NULL DEFAULT NULL
ALTER TABLE episode ADD COLUMN chapters TEXT NULL DEFAULT NULL
UPDATE podcast SET http_last_modified=NULL, http_etag=NULL
"""),
]
@ -172,7 +181,9 @@ def initialize_database(db):
current_position_updated INTEGER NOT NULL DEFAULT 0,
last_playback INTEGER NOT NULL DEFAULT 0,
payment_url TEXT NULL DEFAULT NULL,
description_html TEXT NOT NULL DEFAULT ''
description_html TEXT NOT NULL DEFAULT '',
episode_art_url TEXT NULL DEFAULT NULL,
chapters TEXT NULL DEFAULT NULL
)
""")
@ -299,6 +310,8 @@ def convert_gpodder2_db(old_db, new_db):
0,
None,
'',
None,
None,
)
new_db.execute("""
INSERT INTO episode VALUES (%s)

View File

@ -396,7 +396,7 @@ class iPodDevice(Device):
return False
track = self.ipod.add_track(local_filename, episode.title, episode.channel.title,
util.remove_html_tags(episode.description), episode.url, episode.channel.url,
episode._text_description, episode.url, episode.channel.url,
episode.published, get_track_length(local_filename), episode.file_type() == 'audio')
self.update_from_episode(track, episode, initial=True)