Merge pull request #722 from gpodder/yt-auoumous
Youtube fixes and improvements: - restore download functionality - add adaptive-formats to video preferences ; fail if no selected format is available - download with youtube-dl context-menu - get duration
This commit is contained in:
commit
a66e70cf20
4 changed files with 164 additions and 92 deletions
|
@ -68,11 +68,12 @@ class YoutubeCustomDownload(download.CustomDownload):
|
|||
|
||||
Actual youtube-dl interaction via gPodderYoutubeDL.
|
||||
"""
|
||||
def __init__(self, ytdl, url):
|
||||
def __init__(self, ytdl, url, episode):
|
||||
self._ytdl = ytdl
|
||||
self._url = url
|
||||
self._reporthook = None
|
||||
self._prev_dl_bytes = 0
|
||||
self._episode = episode
|
||||
|
||||
def retrieve_resume(self, tempname, reporthook=None):
|
||||
"""
|
||||
|
@ -80,6 +81,8 @@ class YoutubeCustomDownload(download.CustomDownload):
|
|||
"""
|
||||
self._reporthook = reporthook
|
||||
res = self._ytdl.fetch_video(self._url, tempname, self._my_hook)
|
||||
if 'duration' in res and res['duration']:
|
||||
self._episode.total_time = res['duration']
|
||||
headers = {}
|
||||
# youtube-dl doesn't return a content-type but an extension
|
||||
if 'ext' in res:
|
||||
|
@ -375,15 +378,16 @@ class gPodderYoutubeDL(download.CustomDownloader):
|
|||
called from registry.custom_downloader.resolve
|
||||
"""
|
||||
if re.match(r'''https://www.youtube.com/watch\?v=.+''', episode.url):
|
||||
return YoutubeCustomDownload(self, episode.url)
|
||||
return YoutubeCustomDownload(self, episode.url, episode)
|
||||
elif re.match(r'''https://www.youtube.com/watch\?v=.+''', episode.link):
|
||||
return YoutubeCustomDownload(self, episode.link)
|
||||
return YoutubeCustomDownload(self, episode.link, episode)
|
||||
return None
|
||||
|
||||
|
||||
class gPodderExtension:
|
||||
def __init__(self, container):
|
||||
self.container = container
|
||||
self.ytdl = None
|
||||
|
||||
def on_load(self):
|
||||
self.ytdl = gPodderYoutubeDL(self.container.manager.core.config)
|
||||
|
@ -403,3 +407,16 @@ class gPodderExtension:
|
|||
registry.custom_downloader.unregister(self.ytdl.custom_downloader)
|
||||
except ValueError:
|
||||
pass
|
||||
self.ytdl = None
|
||||
|
||||
def on_ui_object_available(self, name, ui_object):
|
||||
if name == 'gpodder-gtk':
|
||||
self.gpodder = ui_object
|
||||
|
||||
def on_episodes_context_menu(self, episodes):
|
||||
if not self.container.config.manage_downloads \
|
||||
and not all(e.was_downloaded(and_exists=True) for e in episodes):
|
||||
return [(_("Download with Youtube-DL"), self.download_episodes)]
|
||||
|
||||
def download_episodes(self, episodes):
|
||||
self.gpodder.download_episode_list(episodes, downloader=self.ytdl)
|
||||
|
|
|
@ -51,7 +51,7 @@ _ = gpodder.gettext
|
|||
class CustomDownload:
|
||||
""" abstract class for custom downloads. DownloadTask call retrieve_resume() on it """
|
||||
|
||||
def retrieve_resume(self, unused_tempname, reporthook):
|
||||
def retrieve_resume(self, tempname, reporthook):
|
||||
"""
|
||||
:param str tempname: temporary filename for the download
|
||||
:param func(number, number, number) reporthook: callback for download progress (count, blockSize, totalSize)
|
||||
|
@ -377,6 +377,67 @@ class DownloadURLOpener(urllib.request.FancyURLopener):
|
|||
return (None, None)
|
||||
|
||||
|
||||
class DefaultDownload(CustomDownload):
|
||||
def __init__(self, config, episode, url):
|
||||
self._config = config
|
||||
self.__episode = episode
|
||||
self._url = url
|
||||
|
||||
def retrieve_resume(self, tempname, reporthook):
|
||||
url = self._url
|
||||
logger.info("Downloading %s", url)
|
||||
downloader = DownloadURLOpener(self.__episode.channel)
|
||||
|
||||
# HTTP Status codes for which we retry the download
|
||||
retry_codes = (408, 418, 504, 598, 599)
|
||||
max_retries = max(0, self._config.auto.retries)
|
||||
|
||||
# Retry the download on timeout (bug 1013)
|
||||
for retry in range(max_retries + 1):
|
||||
if retry > 0:
|
||||
logger.info('Retrying download of %s (%d)', url, retry)
|
||||
time.sleep(1)
|
||||
|
||||
try:
|
||||
headers, real_url = downloader.retrieve_resume(url,
|
||||
tempname, reporthook=reporthook)
|
||||
# If we arrive here, the download was successful
|
||||
break
|
||||
except urllib.error.ContentTooShortError as ctse:
|
||||
if retry < max_retries:
|
||||
logger.info('Content too short: %s - will retry.',
|
||||
url)
|
||||
continue
|
||||
raise
|
||||
except socket.timeout as tmout:
|
||||
if retry < max_retries:
|
||||
logger.info('Socket timeout: %s - will retry.', url)
|
||||
continue
|
||||
raise
|
||||
except gPodderDownloadHTTPError as http:
|
||||
if retry < max_retries and http.error_code in retry_codes:
|
||||
logger.info('HTTP error %d: %s - will retry.',
|
||||
http.error_code, url)
|
||||
continue
|
||||
raise
|
||||
return (headers, real_url)
|
||||
|
||||
|
||||
class DefaultDownloader(CustomDownloader):
|
||||
@staticmethod
|
||||
def custom_downloader(config, episode):
|
||||
url = episode.url
|
||||
# Resolve URL and start downloading the episode
|
||||
res = registry.download_url.resolve(config, None, episode)
|
||||
if res:
|
||||
url = res
|
||||
if url == episode.url:
|
||||
# don't modify custom urls (#635 - vimeo breaks if * is unescaped)
|
||||
url = url.strip()
|
||||
url = util.iri_to_url(url)
|
||||
return DefaultDownload(config, episode, url)
|
||||
|
||||
|
||||
class DownloadQueueWorker(object):
|
||||
def __init__(self, queue, exit_callback, continue_check_callback):
|
||||
self.queue = queue
|
||||
|
@ -602,6 +663,15 @@ class DownloadTask(object):
|
|||
|
||||
episode = property(fget=__get_episode)
|
||||
|
||||
def __get_downloader(self):
|
||||
return self.__downloader
|
||||
|
||||
def __set_downloader(self, downloader):
|
||||
# modifying the downloader will only have effect before the download is started
|
||||
self.__downloader = downloader
|
||||
|
||||
downloader = property(fget=__get_downloader, fset=__set_downloader)
|
||||
|
||||
def cancel(self):
|
||||
if self.status in (self.DOWNLOADING, self.QUEUED):
|
||||
self.status = self.CANCELLED
|
||||
|
@ -610,13 +680,15 @@ class DownloadTask(object):
|
|||
if self.status != self.DONE:
|
||||
util.delete_file(self.tempname)
|
||||
|
||||
def __init__(self, episode, config):
|
||||
def __init__(self, episode, config, downloader=None):
|
||||
assert episode.download_task is None
|
||||
self.__status = DownloadTask.INIT
|
||||
self.__activity = DownloadTask.ACTIVITY_DOWNLOAD
|
||||
self.__status_changed = True
|
||||
self.__episode = episode
|
||||
self._config = config
|
||||
# specify a custom downloader to be used for this download
|
||||
self.__downloader = downloader
|
||||
|
||||
# Create the target filename and save it in the database
|
||||
self.filename = self.__episode.local_filename(create=True)
|
||||
|
@ -776,60 +848,19 @@ class DownloadTask(object):
|
|||
if not self.episode.download_task:
|
||||
self.episode.download_task = self
|
||||
|
||||
try:
|
||||
|
||||
custom_downloader = registry.custom_downloader.resolve(self._config, None, self.episode)
|
||||
|
||||
url = self.__episode.url
|
||||
if custom_downloader:
|
||||
logger.info('Downloading %s with %s', url, custom_downloader)
|
||||
headers, real_url = custom_downloader.retrieve_resume(
|
||||
self.tempname, reporthook=self.status_updated)
|
||||
else:
|
||||
# Resolve URL and start downloading the episode
|
||||
res = registry.download_url.resolve(self._config, None, self.episode)
|
||||
if res:
|
||||
url = res
|
||||
if url == self.__episode.url:
|
||||
# don't modify custom urls (#635 - vimeo breaks if * is unescaped)
|
||||
url = url.strip()
|
||||
url = util.iri_to_url(url)
|
||||
|
||||
logger.info("Downloading %s", url)
|
||||
downloader = DownloadURLOpener(self.__episode.channel)
|
||||
|
||||
# HTTP Status codes for which we retry the download
|
||||
retry_codes = (408, 418, 504, 598, 599)
|
||||
max_retries = max(0, self._config.auto.retries)
|
||||
|
||||
# Retry the download on timeout (bug 1013)
|
||||
for retry in range(max_retries + 1):
|
||||
if retry > 0:
|
||||
logger.info('Retrying download of %s (%d)', url, retry)
|
||||
time.sleep(1)
|
||||
|
||||
try:
|
||||
headers, real_url = downloader.retrieve_resume(url,
|
||||
self.tempname, reporthook=self.status_updated)
|
||||
# If we arrive here, the download was successful
|
||||
break
|
||||
except urllib.error.ContentTooShortError as ctse:
|
||||
if retry < max_retries:
|
||||
logger.info('Content too short: %s - will retry.',
|
||||
url)
|
||||
continue
|
||||
raise
|
||||
except socket.timeout as tmout:
|
||||
if retry < max_retries:
|
||||
logger.info('Socket timeout: %s - will retry.', url)
|
||||
continue
|
||||
raise
|
||||
except gPodderDownloadHTTPError as http:
|
||||
if retry < max_retries and http.error_code in retry_codes:
|
||||
logger.info('HTTP error %d: %s - will retry.',
|
||||
http.error_code, url)
|
||||
continue
|
||||
raise
|
||||
if self.downloader:
|
||||
downloader = self.downloader.custom_downloader(self._config, self.episode)
|
||||
else:
|
||||
downloader = registry.custom_downloader.resolve(self._config, None, self.episode)
|
||||
|
||||
if downloader:
|
||||
logger.info('Downloading %s with %s', url, downloader)
|
||||
else:
|
||||
downloader = DefaultDownloader.custom_downloader(self._config, self.episode)
|
||||
|
||||
headers, real_url = downloader.retrieve_resume(self.tempname, self.status_updated)
|
||||
|
||||
new_mimetype = headers.get('content-type', self.__episode.mime_type)
|
||||
old_mimetype = self.__episode.mime_type
|
||||
|
|
|
@ -2918,7 +2918,7 @@ class gPodder(BuilderWidget, dbus.service.Object):
|
|||
def download_episode_list_paused(self, episodes):
|
||||
self.download_episode_list(episodes, True)
|
||||
|
||||
def download_episode_list(self, episodes, add_paused=False, force_start=False):
|
||||
def download_episode_list(self, episodes, add_paused=False, force_start=False, downloader=None):
|
||||
def queue_tasks(tasks, queued_existing_task):
|
||||
for task in tasks:
|
||||
if add_paused:
|
||||
|
@ -2950,6 +2950,9 @@ class gPodder(BuilderWidget, dbus.service.Object):
|
|||
if episode.url == task.url:
|
||||
task_exists = True
|
||||
if task.status not in (task.DOWNLOADING, task.QUEUED):
|
||||
if downloader:
|
||||
# replace existing task's download with forced one
|
||||
task.downloader = downloader
|
||||
if force_start:
|
||||
self.download_queue_manager.force_start_task(task)
|
||||
else:
|
||||
|
@ -2961,7 +2964,7 @@ class gPodder(BuilderWidget, dbus.service.Object):
|
|||
continue
|
||||
|
||||
try:
|
||||
task = download.DownloadTask(episode, self.config)
|
||||
task = download.DownloadTask(episode, self.config, downloader=downloader)
|
||||
except Exception as e:
|
||||
d = {'episode': html.escape(episode.title), 'message': html.escape(str(e))}
|
||||
message = _('Download error while downloading %(episode)s: %(message)s')
|
||||
|
@ -3386,8 +3389,6 @@ class gPodder(BuilderWidget, dbus.service.Object):
|
|||
def on_download_selected_episodes(self, action_or_widget, param=None):
|
||||
episodes = self.get_selected_episodes()
|
||||
self.download_episode_list(episodes)
|
||||
self.update_episode_list_icons([episode.url for episode in episodes])
|
||||
self.play_or_download()
|
||||
|
||||
def on_treeAvailable_row_activated(self, widget, path, view_column):
|
||||
"""Double-click/enter action handler for treeAvailable"""
|
||||
|
|
|
@ -44,16 +44,16 @@ formats = [
|
|||
# Fallback to an MP4 version of same quality.
|
||||
# Try 34 (FLV 360p H.264 AAC) if 18 (MP4 360p) fails.
|
||||
# Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
|
||||
(46, ([46, 37, 45, 22, 44, 35, 43, 18, 6, 34, 5],
|
||||
(46, ([46, 37, 45, 22, '136+140', 44, 35, 43, 18, '134+140', 6, 34, 5],
|
||||
'45/1280x720/99/0/0',
|
||||
'WebM 1080p (1920x1080)')), # N/A, 192 kbps
|
||||
(45, ([45, 22, 44, 35, 43, 18, 6, 34, 5],
|
||||
(45, ([45, 22, '136+140', 44, 35, 43, 18, '134+140', 6, 34, 5],
|
||||
'45/1280x720/99/0/0',
|
||||
'WebM 720p (1280x720)')), # 2.0 Mbps, 192 kbps
|
||||
(44, ([44, 35, 43, 18, 6, 34, 5],
|
||||
(44, ([44, 35, 43, 18, '134+140', 6, 34, 5],
|
||||
'44/854x480/99/0/0',
|
||||
'WebM 480p (854x480)')), # 1.0 Mbps, 128 kbps
|
||||
(43, ([43, 18, 6, 34, 5],
|
||||
(43, ([43, 18, '134+140', 6, 34, 5],
|
||||
'43/640x360/99/0/0',
|
||||
'WebM 360p (640x360)')), # 0.5 Mbps, 128 kbps
|
||||
|
||||
|
@ -61,16 +61,16 @@ formats = [
|
|||
# Try 35 (FLV 480p H.264 AAC) between 720p and 360p because there's no MP4 480p.
|
||||
# Try 34 (FLV 360p H.264 AAC) if 18 (MP4 360p) fails.
|
||||
# Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
|
||||
(38, ([38, 37, 22, 35, 18, 34, 6, 5],
|
||||
(38, ([38, 37, 22, '136+140', 35, 18, '134+140', 34, 6, 5],
|
||||
'38/1920x1080/9/0/115',
|
||||
'MP4 4K 3072p (4096x3072)')), # 5.0 - 3.5 Mbps, 192 kbps
|
||||
(37, ([37, 22, 35, 18, 34, 6, 5],
|
||||
(37, ([37, 22, '136+140', 35, 18, '134+140', 34, 6, 5],
|
||||
'37/1920x1080/9/0/115',
|
||||
'MP4 HD 1080p (1920x1080)')), # 4.3 - 3.0 Mbps, 192 kbps
|
||||
(22, ([22, 35, 18, 34, 6, 5],
|
||||
(22, ([22, '136+140', 35, 18, '134+140', 34, 6, 5],
|
||||
'22/1280x720/9/0/115',
|
||||
'MP4 HD 720p (1280x720)')), # 2.9 - 2.0 Mbps, 192 kbps
|
||||
(18, ([18, 34, 6, 5],
|
||||
(18, ([18, '134+140', 34, 6, 5],
|
||||
'18/640x360/9/0/115',
|
||||
'MP4 360p (640x360)')), # 0.5 Mbps, 96 kbps
|
||||
|
||||
|
@ -117,7 +117,9 @@ def get_fmt_ids(youtube_config):
|
|||
@registry.download_url.register
|
||||
def youtube_real_download_url(config, episode):
|
||||
fmt_ids = get_fmt_ids(config.youtube) if config else None
|
||||
res = get_real_download_url(episode.url, fmt_ids)
|
||||
res, duration = get_real_download_url(episode.url, fmt_ids)
|
||||
if duration is not None:
|
||||
episode.total_time = int(int(duration) / 1000)
|
||||
return None if res == episode.url else res
|
||||
|
||||
|
||||
|
@ -125,6 +127,8 @@ def get_real_download_url(url, preferred_fmt_ids=None):
|
|||
if not preferred_fmt_ids:
|
||||
preferred_fmt_ids, _, _ = formats_dict[22] # MP4 720p
|
||||
|
||||
duration = None
|
||||
|
||||
vid = get_youtube_id(url)
|
||||
if vid is not None:
|
||||
page = None
|
||||
|
@ -142,42 +146,59 @@ def get_real_download_url(url, preferred_fmt_ids=None):
|
|||
# (http://forum.videohelp.com/topic336882-1800.html#1912972)
|
||||
|
||||
def find_urls(page):
|
||||
# streamingData is preferable to url_encoded_fmt_stream_map
|
||||
# streamingData.formats are the same as url_encoded_fmt_stream_map
|
||||
# streamingData.adaptiveFormats are audio-only and video-only formats
|
||||
x = parse_qs(page)
|
||||
|
||||
if 'reason' in x:
|
||||
error_message = util.remove_html_tags(x['reason'][0])
|
||||
elif 'player_response' in x:
|
||||
player_response = json.loads(x['player_response'][0])
|
||||
|
||||
if 'reason' in player_response['playabilityStatus']:
|
||||
error_message = util.remove_html_tags(player_response['playabilityStatus']['reason'])
|
||||
elif 'live_playback' in x:
|
||||
error_message = 'live stream'
|
||||
elif 'post_live_playback' in x:
|
||||
error_message = 'post live stream'
|
||||
elif 'streamingData' in player_response:
|
||||
# DRM videos store url inside a cipher key - not supported
|
||||
if 'formats' in player_response['streamingData']:
|
||||
for f in player_response['streamingData']['formats']:
|
||||
if 'url' in f:
|
||||
yield int(f['itag']), [f['url'], f.get('approxDurationMs')]
|
||||
if 'adaptiveFormats' in player_response['streamingData']:
|
||||
for f in player_response['streamingData']['adaptiveFormats']:
|
||||
if 'url' in f:
|
||||
yield int(f['itag']), [f['url'], f.get('approxDurationMs')]
|
||||
return
|
||||
|
||||
if error_message:
|
||||
raise YouTubeError('Cannot download video: %s' % error_message)
|
||||
|
||||
r4 = re.search('url_encoded_fmt_stream_map=([^&]+)', page)
|
||||
if r4 is not None:
|
||||
fmt_url_map = urllib.parse.unquote(r4.group(1))
|
||||
for fmt_url_encoded in fmt_url_map.split(','):
|
||||
video_info = parse_qs(fmt_url_encoded)
|
||||
yield int(video_info['itag'][0]), video_info['url'][0]
|
||||
else:
|
||||
error_info = parse_qs(page)
|
||||
if 'reason' in error_info:
|
||||
error_message = util.remove_html_tags(error_info['reason'][0])
|
||||
elif 'player_response' in error_info:
|
||||
player_response = json.loads(error_info['player_response'][0])
|
||||
if 'reason' in player_response['playabilityStatus']:
|
||||
error_message = util.remove_html_tags(player_response['playabilityStatus']['reason'])
|
||||
elif 'live_playback' in error_info:
|
||||
error_message = 'live stream'
|
||||
elif 'post_live_playback' in error_info:
|
||||
error_message = 'post live stream'
|
||||
else:
|
||||
error_message = ''
|
||||
else:
|
||||
error_message = ''
|
||||
raise YouTubeError('Cannot download video: %s' % error_message)
|
||||
yield int(video_info['itag'][0]), [video_info['url'][0], None]
|
||||
|
||||
fmt_id_url_map = sorted(find_urls(page), reverse=True)
|
||||
|
||||
if not fmt_id_url_map:
|
||||
raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid)
|
||||
|
||||
# Default to the highest fmt_id if we don't find a match below
|
||||
_, url = fmt_id_url_map[0]
|
||||
drm = re.search('%22cipher%22%3A', page)
|
||||
if drm is not None:
|
||||
raise YouTubeError('Unsupported DRM content found for video ID "%s"' % vid)
|
||||
raise YouTubeError('No formats found for video ID "%s"' % vid)
|
||||
|
||||
formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map)
|
||||
fmt_id_url_map = dict(fmt_id_url_map)
|
||||
|
||||
for id in preferred_fmt_ids:
|
||||
if re.search('\+', str(id)):
|
||||
# skip formats that contain a + (136+140)
|
||||
continue
|
||||
id = int(id)
|
||||
if id in formats_available:
|
||||
format = formats_dict.get(id)
|
||||
|
@ -188,10 +209,12 @@ def get_real_download_url(url, preferred_fmt_ids=None):
|
|||
|
||||
logger.info('Found YouTube format: %s (fmt_id=%d)',
|
||||
description, id)
|
||||
url = fmt_id_url_map[id]
|
||||
url, duration = fmt_id_url_map[id]
|
||||
break
|
||||
else:
|
||||
raise YouTubeError('No preferred formats found for video ID "%s"' % vid)
|
||||
|
||||
return url
|
||||
return url, duration
|
||||
|
||||
|
||||
def get_youtube_id(url):
|
||||
|
|
Loading…
Reference in a new issue