Use raw strings in regexp patterns, fix DeprecationWarnings
This commit is contained in:
parent
de811b2c2b
commit
0e94386bd3
|
@ -243,10 +243,10 @@ class ExtensionContainer(object):
|
||||||
|
|
||||||
encoding = util.guess_encoding(filename)
|
encoding = util.guess_encoding(filename)
|
||||||
extension_py = open(filename, "r", encoding=encoding).read()
|
extension_py = open(filename, "r", encoding=encoding).read()
|
||||||
metadata = dict(re.findall("__([a-z_]+)__ = '([^']+)'", extension_py))
|
metadata = dict(re.findall(r"__([a-z_]+)__ = '([^']+)'", extension_py))
|
||||||
|
|
||||||
# Support for using gpodder.gettext() as _ to localize text
|
# Support for using gpodder.gettext() as _ to localize text
|
||||||
localized_metadata = dict(re.findall("__([a-z_]+)__ = _\('([^']+)'\)",
|
localized_metadata = dict(re.findall(r"__([a-z_]+)__ = _\('([^']+)'\)",
|
||||||
extension_py))
|
extension_py))
|
||||||
|
|
||||||
for key in localized_metadata:
|
for key in localized_metadata:
|
||||||
|
|
|
@ -65,10 +65,10 @@ class GEpisode(model.PodcastEpisode):
|
||||||
length_str = ''
|
length_str = ''
|
||||||
return ('<b>%s</b>\n<small>%s' + _('released %s') +
|
return ('<b>%s</b>\n<small>%s' + _('released %s') +
|
||||||
'; ' + _('from %s') + '</small>') % (
|
'; ' + _('from %s') + '</small>') % (
|
||||||
html.escape(re.sub('\s+', ' ', self.title)),
|
html.escape(re.sub(r'\s+', ' ', self.title)),
|
||||||
html.escape(length_str),
|
html.escape(length_str),
|
||||||
html.escape(self.pubdate_prop),
|
html.escape(self.pubdate_prop),
|
||||||
html.escape(re.sub('\s+', ' ', self.channel.title)))
|
html.escape(re.sub(r'\s+', ' ', self.channel.title)))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def markup_delete_episodes(self):
|
def markup_delete_episodes(self):
|
||||||
|
|
|
@ -400,7 +400,7 @@ class PodcastEpisode(PodcastModelObject):
|
||||||
# "#001: Title" -> "001: Title"
|
# "#001: Title" -> "001: Title"
|
||||||
if (
|
if (
|
||||||
not self.parent._common_prefix and
|
not self.parent._common_prefix and
|
||||||
re.match('^#\d+: ', self.title) and
|
re.match(r'^#\d+: ', self.title) and
|
||||||
len(self.title) - 1 > LEFTOVER_MIN):
|
len(self.title) - 1 > LEFTOVER_MIN):
|
||||||
return self.title[1:]
|
return self.title[1:]
|
||||||
|
|
||||||
|
@ -478,7 +478,7 @@ class PodcastEpisode(PodcastModelObject):
|
||||||
def one_line_description(self):
|
def one_line_description(self):
|
||||||
MAX_LINE_LENGTH = 120
|
MAX_LINE_LENGTH = 120
|
||||||
desc = util.remove_html_tags(self.description or '')
|
desc = util.remove_html_tags(self.description or '')
|
||||||
desc = re.sub('\s+', ' ', desc).strip()
|
desc = re.sub(r'\s+', ' ', desc).strip()
|
||||||
if not desc:
|
if not desc:
|
||||||
return _('No description available')
|
return _('No description available')
|
||||||
else:
|
else:
|
||||||
|
@ -949,7 +949,7 @@ class PodcastChannel(PodcastModelObject):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sort_key(cls, podcast):
|
def sort_key(cls, podcast):
|
||||||
key = util.convert_bytes(podcast.title.lower())
|
key = util.convert_bytes(podcast.title.lower())
|
||||||
return re.sub('^the ', '', key).translate(cls.UNICODE_TRANSLATE)
|
return re.sub(r'^the ', '', key).translate(cls.UNICODE_TRANSLATE)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls, model, url, create=True, authentication_tokens=None, max_episodes=0):
|
def load(cls, model, url, create=True, authentication_tokens=None, max_episodes=0):
|
||||||
|
@ -1007,7 +1007,7 @@ class PodcastChannel(PodcastModelObject):
|
||||||
|
|
||||||
def _consume_updated_title(self, new_title):
|
def _consume_updated_title(self, new_title):
|
||||||
# Replace multi-space and newlines with single space (Maemo bug 11173)
|
# Replace multi-space and newlines with single space (Maemo bug 11173)
|
||||||
new_title = re.sub('\s+', ' ', new_title).strip()
|
new_title = re.sub(r'\s+', ' ', new_title).strip()
|
||||||
|
|
||||||
# Only update the podcast-supplied title when we
|
# Only update the podcast-supplied title when we
|
||||||
# don't yet have a title, or if the title is the
|
# don't yet have a title, or if the title is the
|
||||||
|
|
|
@ -161,7 +161,7 @@ class SoundcloudUser(object):
|
||||||
|
|
||||||
|
|
||||||
class SoundcloudFeed(model.Feed):
|
class SoundcloudFeed(model.Feed):
|
||||||
URL_REGEX = re.compile('https?://([a-z]+\.)?soundcloud\.com/([^/]+)$', re.I)
|
URL_REGEX = re.compile(r'https?://([a-z]+\.)?soundcloud\.com/([^/]+)$', re.I)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def fetch_channel(cls, channel, max_episodes=0):
|
def fetch_channel(cls, channel, max_episodes=0):
|
||||||
|
@ -218,7 +218,7 @@ class SoundcloudFeed(model.Feed):
|
||||||
|
|
||||||
|
|
||||||
class SoundcloudFavFeed(SoundcloudFeed):
|
class SoundcloudFavFeed(SoundcloudFeed):
|
||||||
URL_REGEX = re.compile('https?://([a-z]+\.)?soundcloud\.com/([^/]+)/favorites', re.I)
|
URL_REGEX = re.compile(r'https?://([a-z]+\.)?soundcloud\.com/([^/]+)/favorites', re.I)
|
||||||
|
|
||||||
def __init__(self, username):
|
def __init__(self, username):
|
||||||
super(SoundcloudFavFeed, self).__init__(username)
|
super(SoundcloudFavFeed, self).__init__(username)
|
||||||
|
|
|
@ -603,7 +603,7 @@ def is_html(text):
|
||||||
>>> is_html('a < b < c')
|
>>> is_html('a < b < c')
|
||||||
False
|
False
|
||||||
"""
|
"""
|
||||||
html_test = re.compile('<[a-z][a-z0-9]*(?:\s.*?>|\/?>)', re.IGNORECASE | re.DOTALL)
|
html_test = re.compile(r'<[a-z][a-z0-9]*(?:\s.*?>|\/?>)', re.IGNORECASE | re.DOTALL)
|
||||||
return bool(html_test.search(text))
|
return bool(html_test.search(text))
|
||||||
|
|
||||||
|
|
||||||
|
@ -617,18 +617,18 @@ def remove_html_tags(html):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# If we would want more speed, we could make these global
|
# If we would want more speed, we could make these global
|
||||||
re_strip_tags = re.compile('<[^>]*>')
|
re_strip_tags = re.compile(r'<[^>]*>')
|
||||||
re_unicode_entities = re.compile('&#(\d{2,4});')
|
re_unicode_entities = re.compile(r'&#(\d{2,4});')
|
||||||
re_html_entities = re.compile('&(.{2,8});')
|
re_html_entities = re.compile(r'&(.{2,8});')
|
||||||
re_newline_tags = re.compile('(<br[^>]*>|<[/]?ul[^>]*>|</li>)', re.I)
|
re_newline_tags = re.compile(r'(<br[^>]*>|<[/]?ul[^>]*>|</li>)', re.I)
|
||||||
re_listing_tags = re.compile('<li[^>]*>', re.I)
|
re_listing_tags = re.compile(r'<li[^>]*>', re.I)
|
||||||
|
|
||||||
result = html
|
result = html
|
||||||
|
|
||||||
# Convert common HTML elements to their text equivalent
|
# Convert common HTML elements to their text equivalent
|
||||||
result = re_newline_tags.sub('\n', result)
|
result = re_newline_tags.sub('\n', result)
|
||||||
result = re_listing_tags.sub('\n * ', result)
|
result = re_listing_tags.sub('\n * ', result)
|
||||||
result = re.sub('<[Pp]>', '\n\n', result)
|
result = re.sub(r'<[Pp]>', '\n\n', result)
|
||||||
|
|
||||||
# Remove all HTML/XML tags from the string
|
# Remove all HTML/XML tags from the string
|
||||||
result = re_strip_tags.sub('', result)
|
result = re_strip_tags.sub('', result)
|
||||||
|
@ -640,7 +640,7 @@ def remove_html_tags(html):
|
||||||
result = re_html_entities.sub(lambda x: entitydefs.get(x.group(1), ''), result)
|
result = re_html_entities.sub(lambda x: entitydefs.get(x.group(1), ''), result)
|
||||||
|
|
||||||
# Convert more than two newlines to two newlines
|
# Convert more than two newlines to two newlines
|
||||||
result = re.sub('([\r\n]{2})([\r\n])+', '\\1', result)
|
result = re.sub(r'([\r\n]{2})([\r\n])+', '\\1', result)
|
||||||
|
|
||||||
return result.strip()
|
return result.strip()
|
||||||
|
|
||||||
|
@ -659,7 +659,7 @@ class HyperlinkExtracter(object):
|
||||||
for target, parts in group_it:
|
for target, parts in group_it:
|
||||||
t = ''.join(text for _, text in parts if text is not None)
|
t = ''.join(text for _, text in parts if text is not None)
|
||||||
# Remove trailing spaces
|
# Remove trailing spaces
|
||||||
t = re.sub(' +\n', '\n', t)
|
t = re.sub(r' +\n', '\n', t)
|
||||||
# Convert more than two newlines to two newlines
|
# Convert more than two newlines to two newlines
|
||||||
t = t.replace('\r', '')
|
t = t.replace('\r', '')
|
||||||
t = re.sub(r'\n\n\n+', '\n\n', t)
|
t = re.sub(r'\n\n\n+', '\n\n', t)
|
||||||
|
@ -1846,8 +1846,8 @@ def osx_get_active_interfaces():
|
||||||
"""
|
"""
|
||||||
process = Popen(['ifconfig'], close_fds=True, stdout=subprocess.PIPE)
|
process = Popen(['ifconfig'], close_fds=True, stdout=subprocess.PIPE)
|
||||||
stdout, _ = process.communicate()
|
stdout, _ = process.communicate()
|
||||||
for i in re.split('\n(?!\t)', stdout.decode('utf-8'), re.MULTILINE):
|
for i in re.split(r'\n(?!\t)', stdout.decode('utf-8'), re.MULTILINE):
|
||||||
b = re.match('(\\w+):.*status: (active|associated)$', i, re.MULTILINE | re.DOTALL)
|
b = re.match(r'(\w+):.*status: (active|associated)$', i, re.MULTILINE | re.DOTALL)
|
||||||
if b:
|
if b:
|
||||||
yield b.group(1)
|
yield b.group(1)
|
||||||
|
|
||||||
|
@ -1861,8 +1861,8 @@ def unix_get_active_interfaces():
|
||||||
"""
|
"""
|
||||||
process = Popen(['ifconfig'], close_fds=True, stdout=subprocess.PIPE)
|
process = Popen(['ifconfig'], close_fds=True, stdout=subprocess.PIPE)
|
||||||
stdout, _ = process.communicate()
|
stdout, _ = process.communicate()
|
||||||
for i in re.split('\n(?!\t)', stdout.decode(locale.getpreferredencoding()), re.MULTILINE):
|
for i in re.split(r'\n(?!\t)', stdout.decode(locale.getpreferredencoding()), re.MULTILINE):
|
||||||
b = re.match('(\\w+):.*status: (active|associated)$', i, re.MULTILINE | re.DOTALL)
|
b = re.match(r'(\w+):.*status: (active|associated)$', i, re.MULTILINE | re.DOTALL)
|
||||||
if b:
|
if b:
|
||||||
yield b.group(1)
|
yield b.group(1)
|
||||||
|
|
||||||
|
|
|
@ -241,7 +241,7 @@ def get_real_download_url(url, allow_partial, preferred_fmt_ids=None):
|
||||||
manifest = manifest.decode().splitlines()
|
manifest = manifest.decode().splitlines()
|
||||||
|
|
||||||
urls = [line for line in manifest if line[0] != '#']
|
urls = [line for line in manifest if line[0] != '#']
|
||||||
itag_re = re.compile('/itag/([0-9]+)/')
|
itag_re = re.compile(r'/itag/([0-9]+)/')
|
||||||
for url in urls:
|
for url in urls:
|
||||||
itag = itag_re.search(url).group(1)
|
itag = itag_re.search(url).group(1)
|
||||||
yield int(itag), [url, None]
|
yield int(itag), [url, None]
|
||||||
|
@ -263,7 +263,7 @@ def get_real_download_url(url, allow_partial, preferred_fmt_ids=None):
|
||||||
if error_message is not None:
|
if error_message is not None:
|
||||||
raise YouTubeError('Cannot download video: %s' % error_message)
|
raise YouTubeError('Cannot download video: %s' % error_message)
|
||||||
|
|
||||||
r4 = re.search('url_encoded_fmt_stream_map=([^&]+)', page)
|
r4 = re.search(r'url_encoded_fmt_stream_map=([^&]+)', page)
|
||||||
if r4 is not None:
|
if r4 is not None:
|
||||||
fmt_url_map = urllib.parse.unquote(r4.group(1))
|
fmt_url_map = urllib.parse.unquote(r4.group(1))
|
||||||
for fmt_url_encoded in fmt_url_map.split(','):
|
for fmt_url_encoded in fmt_url_map.split(','):
|
||||||
|
@ -273,7 +273,7 @@ def get_real_download_url(url, allow_partial, preferred_fmt_ids=None):
|
||||||
fmt_id_url_map = sorted(find_urls(page), reverse=True)
|
fmt_id_url_map = sorted(find_urls(page), reverse=True)
|
||||||
|
|
||||||
if not fmt_id_url_map:
|
if not fmt_id_url_map:
|
||||||
drm = re.search('%22(cipher|signatureCipher)%22%3A', page)
|
drm = re.search(r'%22(cipher|signatureCipher)%22%3A', page)
|
||||||
if drm is not None:
|
if drm is not None:
|
||||||
raise YouTubeError('Unsupported DRM content found for video ID "%s"' % vid)
|
raise YouTubeError('Unsupported DRM content found for video ID "%s"' % vid)
|
||||||
raise YouTubeError('No formats found for video ID "%s"' % vid)
|
raise YouTubeError('No formats found for video ID "%s"' % vid)
|
||||||
|
@ -282,7 +282,7 @@ def get_real_download_url(url, allow_partial, preferred_fmt_ids=None):
|
||||||
fmt_id_url_map = dict(fmt_id_url_map)
|
fmt_id_url_map = dict(fmt_id_url_map)
|
||||||
|
|
||||||
for id in preferred_fmt_ids:
|
for id in preferred_fmt_ids:
|
||||||
if re.search('\+', str(id)):
|
if re.search(r'\+', str(id)):
|
||||||
# skip formats that contain a + (136+140)
|
# skip formats that contain a + (136+140)
|
||||||
continue
|
continue
|
||||||
id = int(id)
|
id = int(id)
|
||||||
|
@ -304,15 +304,15 @@ def get_real_download_url(url, allow_partial, preferred_fmt_ids=None):
|
||||||
|
|
||||||
|
|
||||||
def get_youtube_id(url):
|
def get_youtube_id(url):
|
||||||
r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
|
r = re.compile(r'http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
|
||||||
if r is not None:
|
if r is not None:
|
||||||
return r.group(1)
|
return r.group(1)
|
||||||
|
|
||||||
r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re.IGNORECASE).match(url)
|
r = re.compile(r'http[s]?://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re.IGNORECASE).match(url)
|
||||||
if r is not None:
|
if r is not None:
|
||||||
return r.group(1)
|
return r.group(1)
|
||||||
|
|
||||||
r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)[?]', re.IGNORECASE).match(url)
|
r = re.compile(r'http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)[?]', re.IGNORECASE).match(url)
|
||||||
if r is not None:
|
if r is not None:
|
||||||
return r.group(1)
|
return r.group(1)
|
||||||
|
|
||||||
|
@ -335,13 +335,13 @@ def for_each_feed_pattern(func, url, fallback_result):
|
||||||
func() returns None, return fallback_result.
|
func() returns None, return fallback_result.
|
||||||
"""
|
"""
|
||||||
CHANNEL_MATCH_PATTERNS = [
|
CHANNEL_MATCH_PATTERNS = [
|
||||||
'http[s]?://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)',
|
r'http[s]?://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)',
|
||||||
'http[s]?://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)',
|
r'http[s]?://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)',
|
||||||
'http[s]?://(?:[a-z]+\.)?youtube\.com/rss/user/([a-z0-9]+)/videos\.rss',
|
r'http[s]?://(?:[a-z]+\.)?youtube\.com/rss/user/([a-z0-9]+)/videos\.rss',
|
||||||
'http[s]?://(?:[a-z]+\.)?youtube\.com/channel/([-_a-z0-9]+)',
|
r'http[s]?://(?:[a-z]+\.)?youtube\.com/channel/([-_a-z0-9]+)',
|
||||||
'http[s]?://(?:[a-z]+\.)?youtube\.com/feeds/videos.xml\?channel_id=([-_a-z0-9]+)',
|
r'http[s]?://(?:[a-z]+\.)?youtube\.com/feeds/videos.xml\?channel_id=([-_a-z0-9]+)',
|
||||||
'http[s]?://gdata.youtube.com/feeds/users/([^/]+)/uploads',
|
r'http[s]?://gdata.youtube.com/feeds/users/([^/]+)/uploads',
|
||||||
'http[s]?://gdata.youtube.com/feeds/base/users/([^/]+)/uploads',
|
r'http[s]?://gdata.youtube.com/feeds/base/users/([^/]+)/uploads',
|
||||||
]
|
]
|
||||||
|
|
||||||
for pattern in CHANNEL_MATCH_PATTERNS:
|
for pattern in CHANNEL_MATCH_PATTERNS:
|
||||||
|
|
Loading…
Reference in New Issue