Merge pull request #722 from gpodder/yt-auoumous

Youtube fixes and improvements:
 - restore download functionality
 - add adaptive-formats to video preferences ; fail if no selected format is available
 - download with youtube-dl context-menu
 - get duration
This commit is contained in:
Eric Le Lay 2020-01-07 19:20:01 +01:00 committed by GitHub
commit a66e70cf20
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 164 additions and 92 deletions

View file

@ -68,11 +68,12 @@ class YoutubeCustomDownload(download.CustomDownload):
Actual youtube-dl interaction via gPodderYoutubeDL.
"""
def __init__(self, ytdl, url):
def __init__(self, ytdl, url, episode):
self._ytdl = ytdl
self._url = url
self._reporthook = None
self._prev_dl_bytes = 0
self._episode = episode
def retrieve_resume(self, tempname, reporthook=None):
"""
@ -80,6 +81,8 @@ class YoutubeCustomDownload(download.CustomDownload):
"""
self._reporthook = reporthook
res = self._ytdl.fetch_video(self._url, tempname, self._my_hook)
if 'duration' in res and res['duration']:
self._episode.total_time = res['duration']
headers = {}
# youtube-dl doesn't return a content-type but an extension
if 'ext' in res:
@ -375,15 +378,16 @@ class gPodderYoutubeDL(download.CustomDownloader):
called from registry.custom_downloader.resolve
"""
if re.match(r'''https://www.youtube.com/watch\?v=.+''', episode.url):
return YoutubeCustomDownload(self, episode.url)
return YoutubeCustomDownload(self, episode.url, episode)
elif re.match(r'''https://www.youtube.com/watch\?v=.+''', episode.link):
return YoutubeCustomDownload(self, episode.link)
return YoutubeCustomDownload(self, episode.link, episode)
return None
class gPodderExtension:
def __init__(self, container):
self.container = container
self.ytdl = None
def on_load(self):
self.ytdl = gPodderYoutubeDL(self.container.manager.core.config)
@ -403,3 +407,16 @@ class gPodderExtension:
registry.custom_downloader.unregister(self.ytdl.custom_downloader)
except ValueError:
pass
self.ytdl = None
def on_ui_object_available(self, name, ui_object):
if name == 'gpodder-gtk':
self.gpodder = ui_object
def on_episodes_context_menu(self, episodes):
if not self.container.config.manage_downloads \
and not all(e.was_downloaded(and_exists=True) for e in episodes):
return [(_("Download with Youtube-DL"), self.download_episodes)]
def download_episodes(self, episodes):
self.gpodder.download_episode_list(episodes, downloader=self.ytdl)

View file

@ -51,7 +51,7 @@ _ = gpodder.gettext
class CustomDownload:
""" abstract class for custom downloads. DownloadTask call retrieve_resume() on it """
def retrieve_resume(self, unused_tempname, reporthook):
def retrieve_resume(self, tempname, reporthook):
"""
:param str tempname: temporary filename for the download
:param func(number, number, number) reporthook: callback for download progress (count, blockSize, totalSize)
@ -377,6 +377,67 @@ class DownloadURLOpener(urllib.request.FancyURLopener):
return (None, None)
class DefaultDownload(CustomDownload):
def __init__(self, config, episode, url):
self._config = config
self.__episode = episode
self._url = url
def retrieve_resume(self, tempname, reporthook):
url = self._url
logger.info("Downloading %s", url)
downloader = DownloadURLOpener(self.__episode.channel)
# HTTP Status codes for which we retry the download
retry_codes = (408, 418, 504, 598, 599)
max_retries = max(0, self._config.auto.retries)
# Retry the download on timeout (bug 1013)
for retry in range(max_retries + 1):
if retry > 0:
logger.info('Retrying download of %s (%d)', url, retry)
time.sleep(1)
try:
headers, real_url = downloader.retrieve_resume(url,
tempname, reporthook=reporthook)
# If we arrive here, the download was successful
break
except urllib.error.ContentTooShortError as ctse:
if retry < max_retries:
logger.info('Content too short: %s - will retry.',
url)
continue
raise
except socket.timeout as tmout:
if retry < max_retries:
logger.info('Socket timeout: %s - will retry.', url)
continue
raise
except gPodderDownloadHTTPError as http:
if retry < max_retries and http.error_code in retry_codes:
logger.info('HTTP error %d: %s - will retry.',
http.error_code, url)
continue
raise
return (headers, real_url)
class DefaultDownloader(CustomDownloader):
@staticmethod
def custom_downloader(config, episode):
url = episode.url
# Resolve URL and start downloading the episode
res = registry.download_url.resolve(config, None, episode)
if res:
url = res
if url == episode.url:
# don't modify custom urls (#635 - vimeo breaks if * is unescaped)
url = url.strip()
url = util.iri_to_url(url)
return DefaultDownload(config, episode, url)
class DownloadQueueWorker(object):
def __init__(self, queue, exit_callback, continue_check_callback):
self.queue = queue
@ -602,6 +663,15 @@ class DownloadTask(object):
episode = property(fget=__get_episode)
def __get_downloader(self):
return self.__downloader
def __set_downloader(self, downloader):
# modifying the downloader will only have effect before the download is started
self.__downloader = downloader
downloader = property(fget=__get_downloader, fset=__set_downloader)
def cancel(self):
if self.status in (self.DOWNLOADING, self.QUEUED):
self.status = self.CANCELLED
@ -610,13 +680,15 @@ class DownloadTask(object):
if self.status != self.DONE:
util.delete_file(self.tempname)
def __init__(self, episode, config):
def __init__(self, episode, config, downloader=None):
assert episode.download_task is None
self.__status = DownloadTask.INIT
self.__activity = DownloadTask.ACTIVITY_DOWNLOAD
self.__status_changed = True
self.__episode = episode
self._config = config
# specify a custom downloader to be used for this download
self.__downloader = downloader
# Create the target filename and save it in the database
self.filename = self.__episode.local_filename(create=True)
@ -776,60 +848,19 @@ class DownloadTask(object):
if not self.episode.download_task:
self.episode.download_task = self
url = self.__episode.url
try:
custom_downloader = registry.custom_downloader.resolve(self._config, None, self.episode)
url = self.__episode.url
if custom_downloader:
logger.info('Downloading %s with %s', url, custom_downloader)
headers, real_url = custom_downloader.retrieve_resume(
self.tempname, reporthook=self.status_updated)
if self.downloader:
downloader = self.downloader.custom_downloader(self._config, self.episode)
else:
# Resolve URL and start downloading the episode
res = registry.download_url.resolve(self._config, None, self.episode)
if res:
url = res
if url == self.__episode.url:
# don't modify custom urls (#635 - vimeo breaks if * is unescaped)
url = url.strip()
url = util.iri_to_url(url)
downloader = registry.custom_downloader.resolve(self._config, None, self.episode)
logger.info("Downloading %s", url)
downloader = DownloadURLOpener(self.__episode.channel)
if downloader:
logger.info('Downloading %s with %s', url, downloader)
else:
downloader = DefaultDownloader.custom_downloader(self._config, self.episode)
# HTTP Status codes for which we retry the download
retry_codes = (408, 418, 504, 598, 599)
max_retries = max(0, self._config.auto.retries)
# Retry the download on timeout (bug 1013)
for retry in range(max_retries + 1):
if retry > 0:
logger.info('Retrying download of %s (%d)', url, retry)
time.sleep(1)
try:
headers, real_url = downloader.retrieve_resume(url,
self.tempname, reporthook=self.status_updated)
# If we arrive here, the download was successful
break
except urllib.error.ContentTooShortError as ctse:
if retry < max_retries:
logger.info('Content too short: %s - will retry.',
url)
continue
raise
except socket.timeout as tmout:
if retry < max_retries:
logger.info('Socket timeout: %s - will retry.', url)
continue
raise
except gPodderDownloadHTTPError as http:
if retry < max_retries and http.error_code in retry_codes:
logger.info('HTTP error %d: %s - will retry.',
http.error_code, url)
continue
raise
headers, real_url = downloader.retrieve_resume(self.tempname, self.status_updated)
new_mimetype = headers.get('content-type', self.__episode.mime_type)
old_mimetype = self.__episode.mime_type

View file

@ -2918,7 +2918,7 @@ class gPodder(BuilderWidget, dbus.service.Object):
def download_episode_list_paused(self, episodes):
self.download_episode_list(episodes, True)
def download_episode_list(self, episodes, add_paused=False, force_start=False):
def download_episode_list(self, episodes, add_paused=False, force_start=False, downloader=None):
def queue_tasks(tasks, queued_existing_task):
for task in tasks:
if add_paused:
@ -2950,6 +2950,9 @@ class gPodder(BuilderWidget, dbus.service.Object):
if episode.url == task.url:
task_exists = True
if task.status not in (task.DOWNLOADING, task.QUEUED):
if downloader:
# replace existing task's download with forced one
task.downloader = downloader
if force_start:
self.download_queue_manager.force_start_task(task)
else:
@ -2961,7 +2964,7 @@ class gPodder(BuilderWidget, dbus.service.Object):
continue
try:
task = download.DownloadTask(episode, self.config)
task = download.DownloadTask(episode, self.config, downloader=downloader)
except Exception as e:
d = {'episode': html.escape(episode.title), 'message': html.escape(str(e))}
message = _('Download error while downloading %(episode)s: %(message)s')
@ -3386,8 +3389,6 @@ class gPodder(BuilderWidget, dbus.service.Object):
def on_download_selected_episodes(self, action_or_widget, param=None):
episodes = self.get_selected_episodes()
self.download_episode_list(episodes)
self.update_episode_list_icons([episode.url for episode in episodes])
self.play_or_download()
def on_treeAvailable_row_activated(self, widget, path, view_column):
"""Double-click/enter action handler for treeAvailable"""

View file

@ -44,16 +44,16 @@ formats = [
# Fallback to an MP4 version of same quality.
# Try 34 (FLV 360p H.264 AAC) if 18 (MP4 360p) fails.
# Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
(46, ([46, 37, 45, 22, 44, 35, 43, 18, 6, 34, 5],
(46, ([46, 37, 45, 22, '136+140', 44, 35, 43, 18, '134+140', 6, 34, 5],
'45/1280x720/99/0/0',
'WebM 1080p (1920x1080)')), # N/A, 192 kbps
(45, ([45, 22, 44, 35, 43, 18, 6, 34, 5],
(45, ([45, 22, '136+140', 44, 35, 43, 18, '134+140', 6, 34, 5],
'45/1280x720/99/0/0',
'WebM 720p (1280x720)')), # 2.0 Mbps, 192 kbps
(44, ([44, 35, 43, 18, 6, 34, 5],
(44, ([44, 35, 43, 18, '134+140', 6, 34, 5],
'44/854x480/99/0/0',
'WebM 480p (854x480)')), # 1.0 Mbps, 128 kbps
(43, ([43, 18, 6, 34, 5],
(43, ([43, 18, '134+140', 6, 34, 5],
'43/640x360/99/0/0',
'WebM 360p (640x360)')), # 0.5 Mbps, 128 kbps
@ -61,16 +61,16 @@ formats = [
# Try 35 (FLV 480p H.264 AAC) between 720p and 360p because there's no MP4 480p.
# Try 34 (FLV 360p H.264 AAC) if 18 (MP4 360p) fails.
# Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
(38, ([38, 37, 22, 35, 18, 34, 6, 5],
(38, ([38, 37, 22, '136+140', 35, 18, '134+140', 34, 6, 5],
'38/1920x1080/9/0/115',
'MP4 4K 3072p (4096x3072)')), # 5.0 - 3.5 Mbps, 192 kbps
(37, ([37, 22, 35, 18, 34, 6, 5],
(37, ([37, 22, '136+140', 35, 18, '134+140', 34, 6, 5],
'37/1920x1080/9/0/115',
'MP4 HD 1080p (1920x1080)')), # 4.3 - 3.0 Mbps, 192 kbps
(22, ([22, 35, 18, 34, 6, 5],
(22, ([22, '136+140', 35, 18, '134+140', 34, 6, 5],
'22/1280x720/9/0/115',
'MP4 HD 720p (1280x720)')), # 2.9 - 2.0 Mbps, 192 kbps
(18, ([18, 34, 6, 5],
(18, ([18, '134+140', 34, 6, 5],
'18/640x360/9/0/115',
'MP4 360p (640x360)')), # 0.5 Mbps, 96 kbps
@ -117,7 +117,9 @@ def get_fmt_ids(youtube_config):
@registry.download_url.register
def youtube_real_download_url(config, episode):
fmt_ids = get_fmt_ids(config.youtube) if config else None
res = get_real_download_url(episode.url, fmt_ids)
res, duration = get_real_download_url(episode.url, fmt_ids)
if duration is not None:
episode.total_time = int(int(duration) / 1000)
return None if res == episode.url else res
@ -125,6 +127,8 @@ def get_real_download_url(url, preferred_fmt_ids=None):
if not preferred_fmt_ids:
preferred_fmt_ids, _, _ = formats_dict[22] # MP4 720p
duration = None
vid = get_youtube_id(url)
if vid is not None:
page = None
@ -142,42 +146,59 @@ def get_real_download_url(url, preferred_fmt_ids=None):
# (http://forum.videohelp.com/topic336882-1800.html#1912972)
def find_urls(page):
# streamingData is preferable to url_encoded_fmt_stream_map
# streamingData.formats are the same as url_encoded_fmt_stream_map
# streamingData.adaptiveFormats are audio-only and video-only formats
x = parse_qs(page)
if 'reason' in x:
error_message = util.remove_html_tags(x['reason'][0])
elif 'player_response' in x:
player_response = json.loads(x['player_response'][0])
if 'reason' in player_response['playabilityStatus']:
error_message = util.remove_html_tags(player_response['playabilityStatus']['reason'])
elif 'live_playback' in x:
error_message = 'live stream'
elif 'post_live_playback' in x:
error_message = 'post live stream'
elif 'streamingData' in player_response:
# DRM videos store url inside a cipher key - not supported
if 'formats' in player_response['streamingData']:
for f in player_response['streamingData']['formats']:
if 'url' in f:
yield int(f['itag']), [f['url'], f.get('approxDurationMs')]
if 'adaptiveFormats' in player_response['streamingData']:
for f in player_response['streamingData']['adaptiveFormats']:
if 'url' in f:
yield int(f['itag']), [f['url'], f.get('approxDurationMs')]
return
if error_message:
raise YouTubeError('Cannot download video: %s' % error_message)
r4 = re.search('url_encoded_fmt_stream_map=([^&]+)', page)
if r4 is not None:
fmt_url_map = urllib.parse.unquote(r4.group(1))
for fmt_url_encoded in fmt_url_map.split(','):
video_info = parse_qs(fmt_url_encoded)
yield int(video_info['itag'][0]), video_info['url'][0]
else:
error_info = parse_qs(page)
if 'reason' in error_info:
error_message = util.remove_html_tags(error_info['reason'][0])
elif 'player_response' in error_info:
player_response = json.loads(error_info['player_response'][0])
if 'reason' in player_response['playabilityStatus']:
error_message = util.remove_html_tags(player_response['playabilityStatus']['reason'])
elif 'live_playback' in error_info:
error_message = 'live stream'
elif 'post_live_playback' in error_info:
error_message = 'post live stream'
else:
error_message = ''
else:
error_message = ''
raise YouTubeError('Cannot download video: %s' % error_message)
yield int(video_info['itag'][0]), [video_info['url'][0], None]
fmt_id_url_map = sorted(find_urls(page), reverse=True)
if not fmt_id_url_map:
raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid)
# Default to the highest fmt_id if we don't find a match below
_, url = fmt_id_url_map[0]
drm = re.search('%22cipher%22%3A', page)
if drm is not None:
raise YouTubeError('Unsupported DRM content found for video ID "%s"' % vid)
raise YouTubeError('No formats found for video ID "%s"' % vid)
formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map)
fmt_id_url_map = dict(fmt_id_url_map)
for id in preferred_fmt_ids:
if re.search('\+', str(id)):
# skip formats that contain a + (136+140)
continue
id = int(id)
if id in formats_available:
format = formats_dict.get(id)
@ -188,10 +209,12 @@ def get_real_download_url(url, preferred_fmt_ids=None):
logger.info('Found YouTube format: %s (fmt_id=%d)',
description, id)
url = fmt_id_url_map[id]
url, duration = fmt_id_url_map[id]
break
else:
raise YouTubeError('No preferred formats found for video ID "%s"' % vid)
return url
return url, duration
def get_youtube_id(url):