Merge pull request #765 from gpodder/youtube-dl-improvements

Youtube dl improvements
This commit is contained in:
Eric Le Lay 2020-03-24 08:39:48 +01:00 committed by GitHub
commit 84f5f79310
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 33 additions and 12 deletions

View File

@ -11,7 +11,7 @@ import sys
import time
import youtube_dl
from youtube_dl.utils import DownloadError, sanitize_url
from youtube_dl.utils import DownloadError, ExtractorError, sanitize_url
import gpodder
from gpodder import download, feedcore, model, registry, youtube
@ -187,7 +187,7 @@ class YoutubeFeed(model.Feed):
new_entries = [e for e in entries if e['guid'] not in existing_guids]
logger.debug('%i/%i new entries', len(new_entries), len(all_seen_guids))
self._ie_result['entries'] = new_entries
self._downloader.refresh_entries(self._ie_result, self._max_episodes)
self._downloader.refresh_entries(self._ie_result)
# episodes from entries
episodes = []
for en in self._ie_result['entries']:
@ -290,7 +290,9 @@ class gPodderYoutubeDL(download.CustomDownloader):
def fetch_video(self, url, tempname, reporthook):
opts = {
'outtmpl': tempname, # use given tempname by DownloadTask
# outtmpl: use given tempname by DownloadTask
# (escape % and $ because outtmpl used as a string template by youtube-dl)
'outtmpl': tempname.replace('%', '%%').replace('$', '$$'),
'nopart': True, # don't append .part (already .partial)
'retries': 3, # retry a few times
'progress_hooks': [reporthook] # to notify UI
@ -300,7 +302,7 @@ class gPodderYoutubeDL(download.CustomDownloader):
with youtube_dl.YoutubeDL(opts) as ydl:
return ydl.extract_info(url, download=True)
def refresh_entries(self, ie_result, max_episodes):
def refresh_entries(self, ie_result):
# only interested in video metadata
opts = {
'skip_download': True, # don't download the video
@ -308,11 +310,22 @@ class gPodderYoutubeDL(download.CustomDownloader):
}
self.add_format(self.gpodder_config, opts, fallback='18')
opts.update(self._ydl_opts)
try:
with youtube_dl.YoutubeDL(opts) as ydl:
ydl.process_ie_result(ie_result, download=False)
except DownloadError:
logger.exception('refreshing %r', ie_result)
new_entries = []
# refresh videos one by one to catch single videos blocked by youtube
for e in ie_result.get('entries', []):
tmp = {k: v for k, v in ie_result.items() if k != 'entries'}
tmp['entries'] = [e]
try:
with youtube_dl.YoutubeDL(opts) as ydl:
ydl.process_ie_result(tmp, download=False)
new_entries.extend(tmp.get('entries'))
except DownloadError as ex:
if ex.exc_info[0] == ExtractorError:
# for instance "This video contains content from xyz, who has blocked it on copyright grounds"
logger.warning('Skipping %s: %s', e.get('title', ''), ex.exc_info[1])
continue
logger.exception('Skipping %r: %s', tmp, ex.exc_info)
ie_result['entries'] = new_entries
def refresh(self, url, channel_url, max_episodes):
"""

View File

@ -651,6 +651,7 @@ class HyperlinkExtracter(object):
def __init__(self):
self.parts = []
self.target_stack = [None]
self.ignore_data = False
def get_result(self):
# Group together multiple consecutive parts with same link target,
@ -698,11 +699,18 @@ class HyperlinkExtracter(object):
if len(self.target_stack) > 1:
self.target_stack.pop()
def handle_start_style(self, attrs):
self.ignore_data = True
def handle_end_style(self):
self.ignore_data = False
def output(self, text):
self.parts.append((self.target_stack[-1], text))
def handle_data(self, data):
self.output(self.htmlws(data))
if not self.ignore_data:
self.output(self.htmlws(data))
def handle_entityref(self, name):
c = chr(name2codepoint[name])

View File

@ -91,8 +91,8 @@ html5lib==1.0.1
webencodings==0.5.1
six==1.12.0
certifi==2019.9.11
mutagen==1.42.0
youtube_dl==2019.9.12.1
mutagen==1.44.0
youtube_dl==2020.3.8
"
function install_deps {