Use raw strings in regexp patterns, fix DeprecationWarnings

This commit is contained in:
Teemu Ikonen 2021-01-14 23:17:10 +02:00
parent de811b2c2b
commit 0e94386bd3
6 changed files with 37 additions and 37 deletions

View File

@ -243,10 +243,10 @@ class ExtensionContainer(object):
encoding = util.guess_encoding(filename)
extension_py = open(filename, "r", encoding=encoding).read()
metadata = dict(re.findall("__([a-z_]+)__ = '([^']+)'", extension_py))
metadata = dict(re.findall(r"__([a-z_]+)__ = '([^']+)'", extension_py))
# Support for using gpodder.gettext() as _ to localize text
localized_metadata = dict(re.findall("__([a-z_]+)__ = _\('([^']+)'\)",
localized_metadata = dict(re.findall(r"__([a-z_]+)__ = _\('([^']+)'\)",
extension_py))
for key in localized_metadata:

View File

@ -65,10 +65,10 @@ class GEpisode(model.PodcastEpisode):
length_str = ''
return ('<b>%s</b>\n<small>%s' + _('released %s') +
'; ' + _('from %s') + '</small>') % (
html.escape(re.sub('\s+', ' ', self.title)),
html.escape(re.sub(r'\s+', ' ', self.title)),
html.escape(length_str),
html.escape(self.pubdate_prop),
html.escape(re.sub('\s+', ' ', self.channel.title)))
html.escape(re.sub(r'\s+', ' ', self.channel.title)))
@property
def markup_delete_episodes(self):

View File

@ -400,7 +400,7 @@ class PodcastEpisode(PodcastModelObject):
# "#001: Title" -> "001: Title"
if (
not self.parent._common_prefix and
re.match('^#\d+: ', self.title) and
re.match(r'^#\d+: ', self.title) and
len(self.title) - 1 > LEFTOVER_MIN):
return self.title[1:]
@ -478,7 +478,7 @@ class PodcastEpisode(PodcastModelObject):
def one_line_description(self):
MAX_LINE_LENGTH = 120
desc = util.remove_html_tags(self.description or '')
desc = re.sub('\s+', ' ', desc).strip()
desc = re.sub(r'\s+', ' ', desc).strip()
if not desc:
return _('No description available')
else:
@ -949,7 +949,7 @@ class PodcastChannel(PodcastModelObject):
@classmethod
def sort_key(cls, podcast):
key = util.convert_bytes(podcast.title.lower())
return re.sub('^the ', '', key).translate(cls.UNICODE_TRANSLATE)
return re.sub(r'^the ', '', key).translate(cls.UNICODE_TRANSLATE)
@classmethod
def load(cls, model, url, create=True, authentication_tokens=None, max_episodes=0):
@ -1007,7 +1007,7 @@ class PodcastChannel(PodcastModelObject):
def _consume_updated_title(self, new_title):
# Replace multi-space and newlines with single space (Maemo bug 11173)
new_title = re.sub('\s+', ' ', new_title).strip()
new_title = re.sub(r'\s+', ' ', new_title).strip()
# Only update the podcast-supplied title when we
# don't yet have a title, or if the title is the

View File

@ -161,7 +161,7 @@ class SoundcloudUser(object):
class SoundcloudFeed(model.Feed):
URL_REGEX = re.compile('https?://([a-z]+\.)?soundcloud\.com/([^/]+)$', re.I)
URL_REGEX = re.compile(r'https?://([a-z]+\.)?soundcloud\.com/([^/]+)$', re.I)
@classmethod
def fetch_channel(cls, channel, max_episodes=0):
@ -218,7 +218,7 @@ class SoundcloudFeed(model.Feed):
class SoundcloudFavFeed(SoundcloudFeed):
URL_REGEX = re.compile('https?://([a-z]+\.)?soundcloud\.com/([^/]+)/favorites', re.I)
URL_REGEX = re.compile(r'https?://([a-z]+\.)?soundcloud\.com/([^/]+)/favorites', re.I)
def __init__(self, username):
super(SoundcloudFavFeed, self).__init__(username)

View File

@ -603,7 +603,7 @@ def is_html(text):
>>> is_html('a < b < c')
False
"""
html_test = re.compile('<[a-z][a-z0-9]*(?:\s.*?>|\/?>)', re.IGNORECASE | re.DOTALL)
html_test = re.compile(r'<[a-z][a-z0-9]*(?:\s.*?>|\/?>)', re.IGNORECASE | re.DOTALL)
return bool(html_test.search(text))
@ -617,18 +617,18 @@ def remove_html_tags(html):
return None
# If we would want more speed, we could make these global
re_strip_tags = re.compile('<[^>]*>')
re_unicode_entities = re.compile('&#(\d{2,4});')
re_html_entities = re.compile('&(.{2,8});')
re_newline_tags = re.compile('(<br[^>]*>|<[/]?ul[^>]*>|</li>)', re.I)
re_listing_tags = re.compile('<li[^>]*>', re.I)
re_strip_tags = re.compile(r'<[^>]*>')
re_unicode_entities = re.compile(r'&#(\d{2,4});')
re_html_entities = re.compile(r'&(.{2,8});')
re_newline_tags = re.compile(r'(<br[^>]*>|<[/]?ul[^>]*>|</li>)', re.I)
re_listing_tags = re.compile(r'<li[^>]*>', re.I)
result = html
# Convert common HTML elements to their text equivalent
result = re_newline_tags.sub('\n', result)
result = re_listing_tags.sub('\n * ', result)
result = re.sub('<[Pp]>', '\n\n', result)
result = re.sub(r'<[Pp]>', '\n\n', result)
# Remove all HTML/XML tags from the string
result = re_strip_tags.sub('', result)
@ -640,7 +640,7 @@ def remove_html_tags(html):
result = re_html_entities.sub(lambda x: entitydefs.get(x.group(1), ''), result)
# Convert more than two newlines to two newlines
result = re.sub('([\r\n]{2})([\r\n])+', '\\1', result)
result = re.sub(r'([\r\n]{2})([\r\n])+', '\\1', result)
return result.strip()
@ -659,7 +659,7 @@ class HyperlinkExtracter(object):
for target, parts in group_it:
t = ''.join(text for _, text in parts if text is not None)
# Remove trailing spaces
t = re.sub(' +\n', '\n', t)
t = re.sub(r' +\n', '\n', t)
# Convert more than two newlines to two newlines
t = t.replace('\r', '')
t = re.sub(r'\n\n\n+', '\n\n', t)
@ -1846,8 +1846,8 @@ def osx_get_active_interfaces():
"""
process = Popen(['ifconfig'], close_fds=True, stdout=subprocess.PIPE)
stdout, _ = process.communicate()
for i in re.split('\n(?!\t)', stdout.decode('utf-8'), re.MULTILINE):
b = re.match('(\\w+):.*status: (active|associated)$', i, re.MULTILINE | re.DOTALL)
for i in re.split(r'\n(?!\t)', stdout.decode('utf-8'), re.MULTILINE):
b = re.match(r'(\w+):.*status: (active|associated)$', i, re.MULTILINE | re.DOTALL)
if b:
yield b.group(1)
@ -1861,8 +1861,8 @@ def unix_get_active_interfaces():
"""
process = Popen(['ifconfig'], close_fds=True, stdout=subprocess.PIPE)
stdout, _ = process.communicate()
for i in re.split('\n(?!\t)', stdout.decode(locale.getpreferredencoding()), re.MULTILINE):
b = re.match('(\\w+):.*status: (active|associated)$', i, re.MULTILINE | re.DOTALL)
for i in re.split(r'\n(?!\t)', stdout.decode(locale.getpreferredencoding()), re.MULTILINE):
b = re.match(r'(\w+):.*status: (active|associated)$', i, re.MULTILINE | re.DOTALL)
if b:
yield b.group(1)

View File

@ -241,7 +241,7 @@ def get_real_download_url(url, allow_partial, preferred_fmt_ids=None):
manifest = manifest.decode().splitlines()
urls = [line for line in manifest if line[0] != '#']
itag_re = re.compile('/itag/([0-9]+)/')
itag_re = re.compile(r'/itag/([0-9]+)/')
for url in urls:
itag = itag_re.search(url).group(1)
yield int(itag), [url, None]
@ -263,7 +263,7 @@ def get_real_download_url(url, allow_partial, preferred_fmt_ids=None):
if error_message is not None:
raise YouTubeError('Cannot download video: %s' % error_message)
r4 = re.search('url_encoded_fmt_stream_map=([^&]+)', page)
r4 = re.search(r'url_encoded_fmt_stream_map=([^&]+)', page)
if r4 is not None:
fmt_url_map = urllib.parse.unquote(r4.group(1))
for fmt_url_encoded in fmt_url_map.split(','):
@ -273,7 +273,7 @@ def get_real_download_url(url, allow_partial, preferred_fmt_ids=None):
fmt_id_url_map = sorted(find_urls(page), reverse=True)
if not fmt_id_url_map:
drm = re.search('%22(cipher|signatureCipher)%22%3A', page)
drm = re.search(r'%22(cipher|signatureCipher)%22%3A', page)
if drm is not None:
raise YouTubeError('Unsupported DRM content found for video ID "%s"' % vid)
raise YouTubeError('No formats found for video ID "%s"' % vid)
@ -282,7 +282,7 @@ def get_real_download_url(url, allow_partial, preferred_fmt_ids=None):
fmt_id_url_map = dict(fmt_id_url_map)
for id in preferred_fmt_ids:
if re.search('\+', str(id)):
if re.search(r'\+', str(id)):
# skip formats that contain a + (136+140)
continue
id = int(id)
@ -304,15 +304,15 @@ def get_real_download_url(url, allow_partial, preferred_fmt_ids=None):
def get_youtube_id(url):
r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
r = re.compile(r'http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
if r is not None:
return r.group(1)
r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re.IGNORECASE).match(url)
r = re.compile(r'http[s]?://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re.IGNORECASE).match(url)
if r is not None:
return r.group(1)
r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)[?]', re.IGNORECASE).match(url)
r = re.compile(r'http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)[?]', re.IGNORECASE).match(url)
if r is not None:
return r.group(1)
@ -335,13 +335,13 @@ def for_each_feed_pattern(func, url, fallback_result):
func() returns None, return fallback_result.
"""
CHANNEL_MATCH_PATTERNS = [
'http[s]?://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)',
'http[s]?://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)',
'http[s]?://(?:[a-z]+\.)?youtube\.com/rss/user/([a-z0-9]+)/videos\.rss',
'http[s]?://(?:[a-z]+\.)?youtube\.com/channel/([-_a-z0-9]+)',
'http[s]?://(?:[a-z]+\.)?youtube\.com/feeds/videos.xml\?channel_id=([-_a-z0-9]+)',
'http[s]?://gdata.youtube.com/feeds/users/([^/]+)/uploads',
'http[s]?://gdata.youtube.com/feeds/base/users/([^/]+)/uploads',
r'http[s]?://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)',
r'http[s]?://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)',
r'http[s]?://(?:[a-z]+\.)?youtube\.com/rss/user/([a-z0-9]+)/videos\.rss',
r'http[s]?://(?:[a-z]+\.)?youtube\.com/channel/([-_a-z0-9]+)',
r'http[s]?://(?:[a-z]+\.)?youtube\.com/feeds/videos.xml\?channel_id=([-_a-z0-9]+)',
r'http[s]?://gdata.youtube.com/feeds/users/([^/]+)/uploads',
r'http[s]?://gdata.youtube.com/feeds/base/users/([^/]+)/uploads',
]
for pattern in CHANNEL_MATCH_PATTERNS: