Merge pull request #843 from gpodder/requests

Use the requests package for all http requests
This commit is contained in:
Eric Le Lay 2020-11-24 17:27:12 +01:00 committed by GitHub
commit aa58c70a1e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 452 additions and 641 deletions

View File

@ -1,12 +1,12 @@
language: python
dist: trusty
dist: focal
sudo: required
python:
- "3.5"
- "3.8"
install:
- sudo apt-get update -q
- sudo apt-get install intltool desktop-file-utils
- pip3 install coverage==4.5.4 minimock pycodestyle isort
- "pip3 install pytest-cov minimock pycodestyle 'isort<5.0' requests pytest pytest-httpserver"
- python3 tools/localdepends.py
script:
- make lint

View File

@ -29,6 +29,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
- [Python 3.5](http://python.org/) or newer
- [Podcastparser](http://gpodder.org/podcastparser/) 0.6.0 or newer
- [mygpoclient](http://gpodder.org/mygpoclient/) 1.7 or newer
- [requests](https://requests.readthedocs.io) 2.24.0 or newer
- Python D-Bus bindings
As an alternative to python-dbus on Mac OS X and Windows, you can use
@ -66,7 +67,9 @@ PyPI. With this, you get a self-contained gPodder CLI codebase.
### Test Dependencies
- python-minimock
- python-coverage
- pytest
- pytest-httpserver
- pytest-cov
- desktop-file-utils
## Testing
@ -85,9 +88,8 @@ Tests in gPodder are written in two different ways:
- [unittests](http://docs.python.org/3/library/unittest.html)
If you want to add doctests, simply write the doctest and make sure that
the module appears in "doctest_modules" in src/gpodder/unittests.py. For
example, the doctests in src/gpodder/util.py are added as 'util' (the
"gpodder" prefix must not be specified there).
the module appears after `--doctest-modules` in `pytest.ini`. If you
add tests to any module in `src/gpodder` you have nothing to do.
If you want to add unit tests for a specific module (ex: gpodder.model),
you should add the tests as gpodder.test.model, or in other words:

View File

@ -96,13 +96,13 @@ except ImportError:
readline = None
try:
import termios
import fcntl
import struct
import termios
except ImportError:
termios = None
fcntl = None
struct = None
termios = None
# A poor man's argparse/getopt - but it works for our use case :)
verbose = False

View File

@ -61,8 +61,10 @@ help:
##########################################################################
unittest:
LC_ALL=C PYTHONPATH=src/ $(PYTHON) -m gpodder.unittests
LC_ALL=C PYTHONPATH=src/ pytest --ignore=tests --ignore=src/gpodder/utilwin32ctypes.py --doctest-modules src/gpodder/util.py src/gpodder/jsonconfig.py
LC_ALL=C PYTHONPATH=src/ pytest tests --ignore=src/gpodder/utilwin32ctypes.py --ignore=src/mygpoclient --cov=gpodder
# ISORTOPTS := -c share src/gpodder tools bin/* *.py # for isort >= 5.0
ISORTOPTS := -rc -c share src/gpodder tools bin/* *.py
lint:
pycodestyle share src/gpodder tools bin/* *.py

View File

@ -1,2 +0,0 @@
[pytest]
addopts = --doctest-modules src/gpodder

View File

@ -5,5 +5,5 @@ select = W1, W2, W3, E11, E121, E122, E123, E124, E125, E127, E129, E13, E2, E3,
max-line-length = 142
[isort]
known_third_party=dbus,gi,mutagen,cairo,requests,github3,jinja2,magic,youtube_dl,podcastparser,mygpoclient
known_third_party=cairo,comtypes,dbus,gi,github3,jinja2,kaa,magic,mutagen,mygpoclient,podcastparser,requests,youtube_dl
known_first_party=gpodder,soco

View File

@ -15,8 +15,9 @@ import os
import shlex
import subprocess
import gpodder
import kaa.metadata
import gpodder
from gpodder import util
logger = logging.getLogger(__name__)

View File

@ -26,9 +26,10 @@ from ctypes import (HRESULT, POINTER, Structure, alignment, c_int, c_uint,
c_ulong, c_ulonglong, c_ushort, c_wchar_p, sizeof)
from ctypes.wintypes import tagRECT
import gpodder
from comtypes import COMMETHOD, GUID, IUnknown, client, wireHWND
import gpodder
_ = gpodder.gettext

View File

@ -143,7 +143,7 @@ del locale_dir
SOCKET_TIMEOUT = 60
socket.setdefaulttimeout(SOCKET_TIMEOUT)
del socket
del SOCKET_TIMEOUT
SOCKET_TIMEOUT
# Variables reserved for GUI-specific use (will be set accordingly)
ui_folders = []
@ -185,7 +185,8 @@ def fixup_home(old_home):
old_home = new_home # force to config directory
print("D: windows-portable build; forcing home to config directory %s" % new_home, file=sys.stderr)
else: # ui.win32, not portable build
from gpodder.utilwin32ctypes import get_documents_folder, get_reg_current_user_string_value
from gpodder.utilwin32ctypes import (
get_documents_folder, get_reg_current_user_string_value)
try:
# from old launcher, see
# https://github.com/gpodder/gpodder/blob/old/gtk2/tools/win32-launcher/folderselector.c

View File

@ -85,7 +85,7 @@ class CoverDownloader(object):
try:
logger.info('Downloading cover art: %s', cover_url)
data = util.urlopen(cover_url, timeout=self.TIMEOUT).read()
data = util.urlopen(cover_url, timeout=self.TIMEOUT).content
except Exception as e:
logger.warn('Cover art download failed: %s', e)
return self._fallback_filename(title)

View File

@ -89,7 +89,7 @@ def directory_entry_from_opml(url):
def directory_entry_from_mygpo_json(url):
return [DirectoryEntry(d['title'], d['url'], d['logo_url'], d['subscribers'], d['description'])
for d in json.load(util.urlopen(url))]
for d in util.urlopen(url).json()]
class GPodderNetSearchProvider(Provider):
@ -152,7 +152,7 @@ class GPodderNetTagsProvider(Provider):
return directory_entry_from_mygpo_json('http://gpodder.net/api/2/tag/%s/50.json' % urllib.parse.quote(tag))
def get_tags(self):
return [DirectoryTag(d['tag'], d['usage']) for d in json.load(util.urlopen('http://gpodder.net/api/2/tags/40.json'))]
return [DirectoryTag(d['tag'], d['usage']) for d in util.urlopen('http://gpodder.net/api/2/tags/40.json').json()]
class SoundcloudSearchProvider(Provider):

View File

@ -36,9 +36,12 @@ import socket
import threading
import time
import urllib.error
import urllib.parse
import urllib.request
from email.header import decode_header
import requests
from requests.adapters import HTTPAdapter
from requests.exceptions import ConnectionError, HTTPError, RequestException
from requests.packages.urllib3.exceptions import MaxRetryError
from requests.packages.urllib3.util.retry import Retry
import gpodder
from gpodder import registry, util
@ -47,6 +50,8 @@ logger = logging.getLogger(__name__)
_ = gpodder.gettext
REDIRECT_RETRIES = 3
class CustomDownload:
""" abstract class for custom downloads. DownloadTask call retrieve_resume() on it """
@ -78,30 +83,6 @@ class CustomDownloader:
return None
def get_header_param(headers, param, header_name):
"""Extract a HTTP header parameter from a dict
Uses the "email" module to retrieve parameters
from HTTP headers. This can be used to get the
"filename" parameter of the "content-disposition"
header for downloads to pick a good filename.
Returns None if the filename cannot be retrieved.
"""
value = None
try:
headers_string = ['%s:%s' % (k, v) for k, v in list(headers.items())]
msg = email.message_from_string('\n'.join(headers_string))
if header_name in msg:
raw_value = msg.get_param(param, header=header_name)
if raw_value is not None:
value = email.utils.collapse_rfc2231_value(raw_value)
except Exception as e:
logger.error('Cannot get %s from %s', param, header_name, exc_info=True)
return value
class ContentRange(object):
# Based on:
# http://svn.pythonpaste.org/Paste/WebOb/trunk/webob/byterange.py
@ -207,9 +188,6 @@ class ContentRange(object):
class DownloadCancelledException(Exception): pass
class AuthenticationError(Exception): pass
class gPodderDownloadHTTPError(Exception):
def __init__(self, url, error_code, error_message):
self.url = url
@ -217,74 +195,38 @@ class gPodderDownloadHTTPError(Exception):
self.error_message = error_message
class DownloadURLOpener(urllib.request.FancyURLopener):
version = gpodder.user_agent
class DownloadURLOpener:
# Sometimes URLs are not escaped correctly - try to fix them
# (see RFC2396; Section 2.4.3. Excluded US-ASCII Characters)
# FYI: The omission of "%" in the list is to avoid double escaping!
ESCAPE_CHARS = dict((ord(c), '%%%x' % ord(c)) for c in ' <>#"{}|\\^[]`')
def __init__(self, channel):
self.channel = channel
self._auth_retry_counter = 0
def __init__(self, channel, max_retries=3):
super().__init__()
self.channel = channel
self.max_retries = max_retries
def http_error(self, url, fp, errcode, errmsg, headers, data=None):
"""Handle http errors.
Overriden to give retry=True to http_error_40{1,7}.
See https://github.com/python/cpython/commit/80f1b059714aeb1c6fc9f6ce1173bc8a51af7dd9
See python issue https://bugs.python.org/issue1368368
"""
result = False
if errcode == 401:
result = self.http_error_401(url, fp, errcode, errmsg, headers, data=data, retry=True)
elif errcode == 407:
result = self.http_error_407(url, fp, errcode, errmsg, headers, data=data, retry=True)
if result:
return result
return super().http_error(url, fp, errcode, errmsg, headers, data=data)
def http_error_default(self, url, fp, errcode, errmsg, headers):
"""
FancyURLopener by default does not raise an exception when
there is some unknown HTTP error code. We want to override
this and provide a function to log the error and raise an
exception, so we don't download the HTTP error page here.
"""
# The following two lines are copied from urllib.URLopener's
# implementation of http_error_default
void = fp.read()
fp.close()
raise gPodderDownloadHTTPError(url, errcode, errmsg)
def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
""" This is the exact same function that's included with urllib
except with "void = fp.read()" commented out. """
if 'location' in headers:
newurl = headers['location']
elif 'uri' in headers:
newurl = headers['uri']
else:
return
# This blocks forever(?) with certain servers (see bug #465)
# void = fp.read()
fp.close()
# In case the server sent a relative URL, join with original:
newurl = urllib.parse.urljoin(self.type + ":" + url, newurl)
return self.open(newurl)
def init_session(self):
""" init a session with our own retry codes + retry count """
# I add a few retries for redirects but it means that I will allow max_retries + REDIRECT_RETRIES
# if encountering max_retries connect and REDIRECT_RETRIES read for instance
retry_strategy = Retry(
total=self.max_retries + REDIRECT_RETRIES,
connect=self.max_retries,
read=self.max_retries,
redirect=max(REDIRECT_RETRIES, self.max_retries),
status=self.max_retries,
status_forcelist=Retry.RETRY_AFTER_STATUS_CODES.union((408, 418, 504, 598, 599,)))
adapter = HTTPAdapter(max_retries=retry_strategy)
http = requests.Session()
http.mount("https://", adapter)
http.mount("http://", adapter)
return http
# The following is based on Python's urllib.py "URLopener.retrieve"
# Also based on http://mail.python.org/pipermail/python-list/2001-October/110069.html
def http_error_206(self, url, fp, errcode, errmsg, headers, data=None):
# The next line is taken from urllib's URLopener.open_http
# method, at the end after the line "if errcode == 200:"
return urllib.addinfourl(fp, headers, 'http:' + url)
def retrieve_resume(self, url, filename, reporthook=None, data=None):
"""Download files from an URL; return (headers, real_url)
@ -294,13 +236,23 @@ class DownloadURLOpener(urllib.request.FancyURLopener):
current_size = 0
tfp = None
headers = {
'User-agent': gpodder.user_agent
}
if self.channel.auth_username or self.channel.auth_password:
logger.debug('Authenticating as "%s"', self.channel.auth_username)
auth = (self.channel.auth_username, self.channel.auth_password)
else:
auth = None
if os.path.exists(filename):
try:
current_size = os.path.getsize(filename)
tfp = open(filename, 'ab')
# If the file exists, then only download the remainder
if current_size > 0:
self.addheader('Range', 'bytes=%s-' % (current_size))
headers['Range'] = 'bytes=%s-' % (current_size)
except:
logger.warn('Cannot resume download: %s', filename, exc_info=True)
tfp = None
@ -312,47 +264,49 @@ class DownloadURLOpener(urllib.request.FancyURLopener):
# Fix a problem with bad URLs that are not encoded correctly (bug 549)
url = url.translate(self.ESCAPE_CHARS)
fp = self.open(url, data)
headers = fp.info()
session = self.init_session()
with session.get(url,
headers=headers,
stream=True,
auth=auth,
timeout=gpodder.SOCKET_TIMEOUT) as resp:
try:
resp.raise_for_status()
except HTTPError as e:
raise gPodderDownloadHTTPError(url, resp.status_code, str(e))
if current_size > 0:
# We told the server to resume - see if she agrees
# See RFC2616 (206 Partial Content + Section 14.16)
# XXX check status code here, too...
range = ContentRange.parse(headers.get('content-range', ''))
if range is None or range.start != current_size:
# Ok, that did not work. Reset the download
# TODO: seek and truncate if content-range differs from request
tfp.close()
tfp = open(filename, 'wb')
current_size = 0
logger.warn('Cannot resume: Invalid Content-Range (RFC2616).')
headers = resp.headers
result = headers, fp.geturl()
bs = 1024 * 8
size = -1
read = current_size
blocknum = current_size // bs
if reporthook:
if "content-length" in headers:
size = int(headers['Content-Length']) + current_size
reporthook(blocknum, bs, size)
while read < size or size == -1:
if size == -1:
block = fp.read(bs)
else:
block = fp.read(min(size - read, bs))
if len(block) == 0:
break
read += len(block)
tfp.write(block)
blocknum += 1
if current_size > 0:
# We told the server to resume - see if she agrees
# See RFC2616 (206 Partial Content + Section 14.16)
# XXX check status code here, too...
range = ContentRange.parse(headers.get('content-range', ''))
if range is None or range.start != current_size:
# Ok, that did not work. Reset the download
# TODO: seek and truncate if content-range differs from request
tfp.close()
tfp = open(filename, 'wb')
current_size = 0
logger.warn('Cannot resume: Invalid Content-Range (RFC2616).')
result = headers, resp.url
bs = 1024 * 8
size = -1
read = current_size
blocknum = current_size // bs
if reporthook:
if "content-length" in headers:
size = int(headers['content-length']) + current_size
reporthook(blocknum, bs, size)
fp.close()
tfp.close()
del fp
del tfp
for block in resp.iter_content(bs):
read += len(block)
tfp.write(block)
blocknum += 1
if reporthook:
reporthook(blocknum, bs, size)
tfp.close()
del tfp
# raise exception if actual size does not match content-length header
if size >= 0 and read < size:
@ -363,19 +317,6 @@ class DownloadURLOpener(urllib.request.FancyURLopener):
# end code based on urllib.py
def prompt_user_passwd(self, host, realm):
# Keep track of authentication attempts, fail after the third one
self._auth_retry_counter += 1
if self._auth_retry_counter > 3:
raise AuthenticationError(_('Wrong username/password'))
if self.channel.auth_username or self.channel.auth_password:
logger.debug('Authenticating as "%s" to "%s" for realm "%s".',
self.channel.auth_username, host, realm)
return (self.channel.auth_username, self.channel.auth_password)
return (None, None)
class DefaultDownload(CustomDownload):
def __init__(self, config, episode, url):
@ -386,13 +327,10 @@ class DefaultDownload(CustomDownload):
def retrieve_resume(self, tempname, reporthook):
url = self._url
logger.info("Downloading %s", url)
downloader = DownloadURLOpener(self.__episode.channel)
# HTTP Status codes for which we retry the download
retry_codes = (408, 418, 504, 598, 599)
max_retries = max(0, self._config.auto.retries)
downloader = DownloadURLOpener(self.__episode.channel, max_retries=max_retries)
# Retry the download on timeout (bug 1013)
# Retry the download on incomplete download (other retries are done by the Retry strategy)
for retry in range(max_retries + 1):
if retry > 0:
logger.info('Retrying download of %s (%d)', url, retry)
@ -409,17 +347,6 @@ class DefaultDownload(CustomDownload):
url)
continue
raise
except socket.timeout as tmout:
if retry < max_retries:
logger.info('Socket timeout: %s - will retry.', url)
continue
raise
except gPodderDownloadHTTPError as http:
if retry < max_retries and http.error_code in retry_codes:
logger.info('HTTP error %d: %s - will retry.',
http.error_code, url)
continue
raise
return (headers, real_url)
@ -893,7 +820,7 @@ class DownloadTask(object):
real_url, os.path.basename(self.filename))
# Look at the Content-disposition header; use if if available
disposition_filename = get_header_param(headers, 'filename', 'content-disposition')
disposition_filename = util.get_header_param(headers, 'filename', 'content-disposition')
# Some servers do send the content-disposition header, but provide
# an empty filename, resulting in an empty string here (bug 1440)
@ -929,6 +856,21 @@ class DownloadTask(object):
except urllib.error.ContentTooShortError as ctse:
self.status = DownloadTask.FAILED
self.error_message = _('Missing content from server')
except ConnectionError as ce:
# special case request exception
self.status = DownloadTask.FAILED
logger.error('Download failed: %s', str(ce), exc_info=True)
d = {'host': ce.args[0].pool.host, 'port': ce.args[0].pool.port}
self.error_message = _("Couldn't connect to server %(host)s:%(port)s" % d)
except RequestException as re:
# extract MaxRetryError to shorten the exception message
if isinstance(re.args[0], MaxRetryError):
re = re.args[0]
logger.error('%s while downloading "%s"', str(re),
self.__episode.title, exc_info=True)
self.status = DownloadTask.FAILED
d = {'error': str(re)}
self.error_message = _('Request Error: %(error)s') % d
except IOError as ioe:
logger.error('%s while downloading "%s": %s', ioe.strerror,
self.__episode.title, ioe.filename, exc_info=True)

View File

@ -1,185 +0,0 @@
# -*- coding: utf-8 -*-
#
# gPodder - A media aggregator and podcast client
# Copyright (c) 2005-2018 The gPodder Team
#
# gPodder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# gPodder is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#
# gpodder.escapist - Escapist Videos download magic
# somini <somini29@yandex.com>; 2014-09-14
#
import json
import logging
import re
import urllib.error
import urllib.parse
import urllib.request
import gpodder
from gpodder import registry, util
logger = logging.getLogger(__name__)
# This matches the more reliable URL
ESCAPIST_NUMBER_RE = re.compile(r'http://www.escapistmagazine.com/videos/view/(\d+)', re.IGNORECASE)
# This matches regular URL, mainly those that come in the RSS feeds
ESCAPIST_REGULAR_RE = re.compile(r'http://www.escapistmagazine.com/videos/view/([\w-]+)/(\d+)-', re.IGNORECASE)
# This finds the RSS for a given URL
DATA_RSS_RE = re.compile(r'http://www.escapistmagazine.com/rss/videos/list/([1-9][0-9]*)\.xml')
# This matches the "configuration". The important part is the JSON between the parens
DATA_CONFIG_RE = re.compile(r'imsVideo\.play\((.*)\)\;\<\/script\>', re.IGNORECASE)
# This matches the cover art for an RSS. We shouldn't parse XML with regex.
DATA_COVERART_RE = re.compile(r'<url>(http:.+\.jpg)</url>')
class EscapistError(BaseException): pass
@registry.download_url.register
def escapist_real_download_url(unused_config, episode, allow_partial):
res = get_real_download_url(episode.url)
return None if res == episode.url else res
def get_real_download_url(url):
video_id = get_escapist_id(url)
if video_id is None:
return url
web_data = get_escapist_web(video_id)
data_config_frag = DATA_CONFIG_RE.search(web_data)
data_config_url = get_escapist_config_url(data_config_frag.group(1))
if data_config_url is None:
raise EscapistError('Cannot parse configuration from the site')
logger.debug('Config URL: %s', data_config_url)
data_config_data = util.urlopen(data_config_url).read().decode('utf-8')
# TODO: This second argument should get a real name
real_url = get_escapist_real_url(data_config_data, data_config_frag.group(1))
if real_url is None:
raise EscapistError('Cannot get MP4 URL from The Escapist')
elif "sales-marketing/" in real_url:
raise EscapistError('Oops, seems The Escapist blocked this IP. Wait a few days/weeks to get it unblocked')
else:
return real_url
def get_escapist_id(url):
result = ESCAPIST_NUMBER_RE.match(url)
if result is not None:
return result.group(1)
result = ESCAPIST_REGULAR_RE.match(url)
if result is not None:
return result.group(2)
return None
def is_video_link(url):
return (get_escapist_id(url) is not None)
def get_real_channel_url(url):
video_id = get_escapist_id(url)
if video_id is None:
return url
web_data = get_escapist_web(video_id)
data_config_frag = DATA_RSS_RE.search(web_data)
if data_config_frag is None:
raise EscapistError('Cannot get RSS URL from The Escapist')
return data_config_frag.group(0)
def get_real_cover(url):
rss_url = get_real_channel_url(url)
if rss_url is None:
return None
# FIXME: can I be sure to decode it as utf-8?
rss_data = util.urlopen(rss_url).read()
rss_data_frag = DATA_COVERART_RE.search(rss_data)
if rss_data_frag is None:
return None
return rss_data_frag.group(1)
def get_escapist_web(video_id):
if video_id is None:
return None
# FIXME: must check if it's utf-8
web_url = 'http://www.escapistmagazine.com/videos/view/%s' % video_id
return util.urlopen(web_url).read()
def get_escapist_config_url(data):
if data is None:
return None
query_string = urllib.parse.urlencode(json.loads(data))
return 'http://www.escapistmagazine.com/videos/vidconfig.php?%s' % query_string
def get_escapist_real_url(data, config_json):
if data is None:
return None
config_data = json.loads(config_json)
if config_data is None:
return None
# The data is scrambled, unscramble
# Direct port from 'imsVideos.prototype.processRequest' from the file 'ims_videos.min.js'
one_hash = config_data["hash"]
# Turn the string into numbers
hash_n = [ord(x) for x in one_hash]
# Split the data into 2char strings
hex_hashes = [data[x:(x + 2)] for x in range(0, len(data), 2)]
# Turn the strings into numbers, considering the hex value
num_hashes = [int(h, 16) for h in hex_hashes]
# Characters again, from the value
# str_hashes = [ unichr(n) for n in num_hashes ]
# Bitwise XOR num_hashes and the hash
result_num = []
for idx in range(0, len(num_hashes)):
result_num.append(num_hashes[idx] ^ hash_n[idx % len(hash_n)])
# At last, Numbers back into characters
result = ''.join([chr(x) for x in result_num])
# A wild JSON appears...
# You use "Master Ball"...
escapist_cfg = json.loads(result)
# It's super effective!
# TODO: There's a way to choose different video types, for now just pick MP4@480p
return escapist_cfg["files"]["videos"][2]["src"]

View File

@ -25,25 +25,15 @@
import logging
import urllib.parse
from html.parser import HTMLParser
from urllib.error import HTTPError
from io import BytesIO
import podcastparser
from requests.exceptions import RequestException
from gpodder import util, youtube
logger = logging.getLogger(__name__)
try:
# Python 2
from rfc822 import mktime_tz
from StringIO import StringIO
except ImportError:
# Python 3
from email.utils import mktime_tz
from io import StringIO
class ExceptionWithData(Exception):
"""Base exception with additional payload"""
def __init__(self, data):
@ -138,81 +128,76 @@ class Fetcher(object):
"""
return None
def _normalize_status(self, status):
# Based on Mark Pilgrim's "Atom aggregator behaviour" article
if status in (200, 301, 302, 304, 400, 401, 403, 404, 410, 500):
return status
elif status >= 200 and status < 300:
return 200
elif status >= 300 and status < 400:
return 302
elif status >= 400 and status < 500:
return 400
elif status >= 500 and status < 600:
return 500
else:
return status
def _check_statuscode(self, response, feed):
status = self._normalize_status(response.getcode())
if status == 200:
return Result(UPDATED_FEED, feed)
elif status == 301:
return Result(NEW_LOCATION, feed)
elif status == 302:
return Result(UPDATED_FEED, feed)
@staticmethod
def _check_statuscode(status, url):
if status >= 200 and status < 300:
return UPDATED_FEED
elif status == 304:
return Result(NOT_MODIFIED, feed)
return NOT_MODIFIED
# redirects are handled by requests directly
# => the status should never be 301, 302, 303, 307, 308
if status == 400:
raise BadRequest('bad request')
elif status == 401:
raise AuthenticationRequired('authentication required', feed)
if status == 401:
raise AuthenticationRequired('authentication required', url)
elif status == 403:
raise Unsubscribe('forbidden')
elif status == 404:
raise NotFound('not found')
elif status == 410:
raise Unsubscribe('resource is gone')
elif status == 500:
elif status >= 400 and status < 500:
raise BadRequest('bad request')
elif status >= 500 and status < 600:
raise InternalServerError('internal server error')
else:
raise UnknownStatusCode(status)
def _parse_feed(self, url, etag, modified, autodiscovery=True, max_episodes=0):
def parse_feed(self, url, data_stream, headers, status, **kwargs):
"""
kwargs are passed from Fetcher.fetch
:param str url: real url
:param data_stream: file-like object to read from (bytes mode)
:param dict-like headers: response headers (may be empty)
:param int status: always UPDATED_FEED for now
:return Result: Result(status, model.Feed from parsed data_stream)
"""
raise NotImplementedError("Implement parse_feed()")
def fetch(self, url, etag=None, modified=None, autodiscovery=True, **kwargs):
""" use kwargs to pass extra data to parse_feed in Fetcher subclasses """
# handle local file first
if url.startswith('file://'):
url = url[len('file://'):]
stream = open(url)
return self.parse_feed(url, stream, {}, UPDATED_FEED, **kwargs)
# remote feed
headers = {}
if modified is not None:
headers['If-Modified-Since'] = modified
if etag is not None:
headers['If-None-Match'] = etag
if url.startswith('file://'):
is_local = True
url = url[len('file://'):]
stream = open(url)
else:
is_local = False
try:
stream = util.urlopen(url, headers)
except HTTPError as e:
return self._check_statuscode(e, e.geturl())
stream = util.urlopen(url, headers)
data = stream
if autodiscovery and not is_local and stream.headers.get('content-type', '').startswith('text/html'):
# Not very robust attempt to detect encoding: http://stackoverflow.com/a/1495675/1072626
charset = stream.headers.get_param('charset')
if charset is None:
charset = 'utf-8' # utf-8 appears hard-coded elsewhere in this codebase
responses = stream.history + [stream]
for i, resp in enumerate(responses):
if resp.is_permanent_redirect:
# there should always be a next response when a redirect is encountered
# If max redirects is reached, TooManyRedirects is raised
# TODO: since we've got the end contents anyway, modify model.py to accept contents on NEW_LOCATION
return Result(NEW_LOCATION, responses[i + 1].url)
res = self._check_statuscode(stream.status_code, stream.url)
if res == NOT_MODIFIED:
return Result(NOT_MODIFIED, stream.url)
# We use StringIO in case the stream needs to be read again
data = StringIO(stream.read().decode(charset))
if autodiscovery and stream.headers.get('content-type', '').startswith('text/html'):
ad = FeedAutodiscovery(url)
ad.feed(data.getvalue())
# response_text() will assume utf-8 if no charset specified
ad.feed(util.response_text(stream))
if ad._resolved_url and ad._resolved_url != url:
try:
self._parse_feed(ad._resolved_url, None, None, False)
self.fetch(ad._resolved_url, etag=None, modified=None, autodiscovery=False, **kwargs)
return Result(NEW_LOCATION, ad._resolved_url)
except Exception as e:
logger.warn('Feed autodiscovery failed', exc_info=True)
@ -222,21 +207,7 @@ class Fetcher(object):
if new_url and new_url != url:
return Result(NEW_LOCATION, new_url)
# Reset the stream so podcastparser can give it a go
data.seek(0)
try:
feed = podcastparser.parse(url, data)
feed['url'] = url
except ValueError as e:
raise InvalidFeed('Could not parse feed: {msg}'.format(msg=e))
if is_local:
feed['headers'] = {}
return Result(UPDATED_FEED, feed)
else:
feed['headers'] = stream.headers
return self._check_statuscode(stream, feed)
def fetch(self, url, etag=None, modified=None, max_episodes=0):
return self._parse_feed(url, etag, modified, max_episodes)
# xml documents specify the encoding inline so better pass encoded body.
# Especially since requests will use ISO-8859-1 for content-type 'text/xml'
# if the server doesn't specify a charset.
return self.parse_feed(url, BytesIO(stream.content), stream.headers, UPDATED_FEED, **kwargs)

View File

@ -57,6 +57,7 @@ class DownloadStatusModel(Gtk.ListStore):
def _format_message(self, episode, message, podcast):
episode = html.escape(episode)
podcast = html.escape(podcast)
message = html.escape(message)
return '%s\n<small>%s - %s</small>' % (episode, message, podcast)
def request_update(self, iter, task=None):

View File

@ -70,6 +70,10 @@ class BuilderWidget(GtkBuilderWidget):
dlg.set_markup('<span weight="bold" size="larger">%s</span>\n\n%s' % (title, message))
else:
dlg.set_markup('<span weight="bold" size="larger">%s</span>' % (message))
# make message copy/pastable
for lbl in dlg.get_message_area():
if isinstance(lbl, Gtk.Label):
lbl.set_selectable(True)
dlg.run()
dlg.destroy()
else:
@ -130,6 +134,9 @@ class BuilderWidget(GtkBuilderWidget):
def show_login_dialog(self, title, message, root_url=None, username=None, password=None,
username_prompt=None, register_callback=None, register_text=None, ask_server=False):
def toggle_password_visibility(_, entry):
entry.set_visibility(not entry.get_visibility())
if username_prompt is None:
username_prompt = _('Username')
@ -181,8 +188,13 @@ class BuilderWidget(GtkBuilderWidget):
password_label = Gtk.Label()
password_label.set_markup('<b>' + _('Password') + ':</b>')
show_password_label = Gtk.Label()
show_password = Gtk.CheckButton.new_with_label(_('Show Password'))
show_password.connect('toggled', toggle_password_visibility, password_entry)
label_entries = [(username_label, username_entry),
(password_label, password_entry)]
(password_label, password_entry),
(show_password_label, show_password)]
if ask_server:
label_entries.insert(0, (server_label, server_entry))

View File

@ -43,9 +43,7 @@ except ImportError:
# Also note that it only works when gPodder is not running !
# For some reason I don't get the events afterwards...
try:
from AppKit import NSObject
from AppKit import NSAppleEventManager
from AppKit import NSAppleEventDescriptor
from AppKit import NSAppleEventDescriptor, NSAppleEventManager, NSObject
class gPodderEventHandler(NSObject):
""" handles Apple Events for :

View File

@ -38,8 +38,7 @@ import time
import podcastparser
import gpodder
from gpodder import (coverart, escapist_videos, feedcore, registry, schema,
util, vimeo, youtube)
from gpodder import coverart, feedcore, registry, schema, util, vimeo, youtube
logger = logging.getLogger(__name__)
@ -183,37 +182,40 @@ class PodcastParserFeed(Feed):
url = self.feed['paged_feed_next']
logger.debug("get_next_page: feed has next %s", url)
url = channel.authenticate_url(url)
res = self.fetcher.fetch(url, max_episodes=max_episodes)
if res.status == feedcore.UPDATED_FEED:
res.feed = PodcastParserFeed(res.feed, self.fetcher, max_episodes)
return res
return self.fetcher.fetch(url, autodiscovery=False, max_episodes=max_episodes)
return None
class gPodderFetcher(feedcore.Fetcher):
"""
This class extends the feedcore Fetcher with the gPodder User-Agent and the
Proxy handler based on the current settings in gPodder.
This class implements fetching a channel from custom feed handlers
or the default using podcastparser
"""
def fetch_channel(self, channel, max_episodes):
custom_feed = registry.feed_handler.resolve(channel, None, max_episodes)
if custom_feed is not None:
return custom_feed
# TODO: revisit authenticate_url: pass auth as kwarg
# If we have a username or password, rebuild the url with them included
# Note: using a HTTPBasicAuthHandler would be pain because we need to
# know the realm. It can be done, but I think this method works, too
url = channel.authenticate_url(channel.url)
res = self.fetch(url, channel.http_etag, channel.http_last_modified, max_episodes)
if res.status == feedcore.UPDATED_FEED:
res.feed = PodcastParserFeed(res.feed, self, max_episodes)
return res
return self.fetch(url, channel.http_etag, channel.http_last_modified, max_episodes=max_episodes)
def _resolve_url(self, url):
url = youtube.get_real_channel_url(url)
url = vimeo.get_real_channel_url(url)
url = escapist_videos.get_real_channel_url(url)
return url
def parse_feed(self, url, data_stream, headers, status, max_episodes=0, **kwargs):
try:
feed = podcastparser.parse(url, data_stream)
feed['url'] = url
feed['headers'] = headers
return feedcore.Result(status, PodcastParserFeed(feed, self, max_episodes))
except ValueError as e:
raise feedcore.InvalidFeed('Could not parse feed: {msg}'.format(msg=e))
# Our podcast model:
#
@ -314,7 +316,7 @@ class PodcastEpisode(PodcastModelObject):
if not episode.url:
return None
if any(mod.is_video_link(episode.url) for mod in (youtube, vimeo, escapist_videos)):
if any(mod.is_video_link(episode.url) for mod in (youtube, vimeo)):
return episode
# Check if we can resolve this link to a audio/video file
@ -587,7 +589,6 @@ class PodcastEpisode(PodcastModelObject):
# Use title for YouTube, Vimeo and Soundcloud downloads
if (youtube.is_video_link(self.url) or
vimeo.is_video_link(self.url) or
escapist_videos.is_video_link(self.url) or
episode_filename == 'stream'):
episode_filename = self.title
@ -677,7 +678,7 @@ class PodcastEpisode(PodcastModelObject):
def file_type(self):
# Assume all YouTube/Vimeo links are video files
if youtube.is_video_link(self.url) or vimeo.is_video_link(self.url) or escapist_videos.is_video_link(self.url):
if youtube.is_video_link(self.url) or vimeo.is_video_link(self.url):
return 'video'
return util.file_type_by_extension(self.extension())
@ -1150,6 +1151,7 @@ class PodcastChannel(PodcastModelObject):
if result.status == feedcore.UPDATED_FEED:
self._consume_updated_feed(result.feed, max_episodes)
elif result.status == feedcore.NEW_LOCATION:
# FIXME: could return the feed because in autodiscovery it is parsed already
url = result.feed
logger.info('New feed location: %s => %s', self.url, url)
if url in set(x.url for x in self.model.get_podcasts()):
@ -1213,7 +1215,7 @@ class PodcastChannel(PodcastModelObject):
return self.section
def _get_content_type(self):
if 'youtube.com' in self.url or 'vimeo.com' in self.url or 'escapistmagazine.com' in self.url:
if 'youtube.com' in self.url or 'vimeo.com' in self.url:
return _('Video')
audio, video, other = 0, 0, 0

View File

@ -34,6 +34,7 @@ objects to valid OPML 1.1 files that can be used to backup
or distribute gPodder's channel subscriptions.
"""
import io
import logging
import os
import os.path
@ -69,8 +70,7 @@ class Importer(object):
if os.path.exists(url):
doc = xml.dom.minidom.parse(url)
else:
# FIXME: is it ok to pass bytes to parseString?
doc = xml.dom.minidom.parseString(util.urlopen(url).read())
doc = xml.dom.minidom.parse(io.BytesIO(util.urlopen(url).content))
for outline in doc.getElementsByTagName('outline'):
# Make sure we are dealing with a valid link type (ignore case)

View File

@ -54,28 +54,6 @@ def soundcloud_parsedate(s):
return time.mktime(tuple([int(x) for x in m.groups()] + [0, 0, -1]))
def get_param(s, param='filename', header='content-disposition'):
"""Get a parameter from a string of headers
By default, this gets the "filename" parameter of
the content-disposition header. This works fine
for downloads from Soundcloud.
"""
msg = email.message_from_string(s)
if header in msg:
value = msg.get_param(param, header=header)
decoded_list = email.header.decode_header(value)
value = []
for part, encoding in decoded_list:
if encoding:
value.append(part.decode(encoding))
else:
value.append(str(part))
return ''.join(value)
return None
def get_metadata(url):
"""Get file download metadata
@ -83,12 +61,12 @@ def get_metadata(url):
URL. Will use the network connection to determine the
metadata via the HTTP header fields.
"""
track_fp = util.urlopen(url)
headers = track_fp.info()
filesize = headers['content-length'] or '0'
filetype = headers['content-type'] or 'application/octet-stream'
headers_s = '\n'.join('%s:%s' % (k, v) for k, v in list(headers.items()))
filename = get_param(headers_s) or os.path.basename(os.path.dirname(url))
track_response = util.urlopen(url)
filesize = track_response.headers['content-length'] or '0'
filetype = track_response.headers['content-type'] or 'application/octet-stream'
headers_s = '\n'.join('%s:%s' % (k, v) for k, v in list(track_response.headers.items()))
filename = util.get_header_param(track_response.headers, 'filename', 'content-disposition') \
or os.path.basename(os.path.dirname(url))
track_fp.close()
return filesize, filetype, filename
@ -116,7 +94,7 @@ class SoundcloudUser(object):
try:
json_url = 'https://api.soundcloud.com/users/%s.json?consumer_key=%s' % (self.username, CONSUMER_KEY)
user_info = json.loads(util.urlopen(json_url).read().decode('utf-8'))
user_info = util.urlopen(json_url).json()
self.cache[key] = user_info
finally:
self.commit_cache()
@ -146,7 +124,7 @@ class SoundcloudUser(object):
"consumer_key": CONSUMER_KEY})
logger.debug("loading %s", json_url)
json_tracks = json.loads(util.urlopen(json_url).read().decode('utf-8'))
json_tracks = util.urlopen(json_url).json()
tracks = [track for track in json_tracks if track['streamable'] or track['downloadable']]
total_count = len(json_tracks)
@ -265,4 +243,4 @@ registry.feed_handler.register(SoundcloudFavFeed.fetch_channel)
def search_for_user(query):
json_url = 'https://api.soundcloud.com/users.json?q=%s&consumer_key=%s' % (urllib.parse.quote(query), CONSUMER_KEY)
return json.loads(util.urlopen(json_url).read().decode('utf-8'))
return util.urlopen(json_url).json()

View File

@ -1,106 +0,0 @@
# -*- coding: utf-8 -*-
#
# gPodder - A media aggregator and podcast client
# Copyright (c) 2005-2018 The gPodder Team
#
# gPodder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# gPodder is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Run Doctests and Unittests for gPodder modules
# 2009-02-25 Thomas Perl <thp@gpodder.org>
import doctest
import sys
import unittest
try:
# Unused here locally, but we import it to be able to give an early
# warning about this missing dependency in order to avoid bogus errors.
import minimock
except ImportError as e:
print("""
Error: Unit tests require the "minimock" module (python-minimock).
Please install it before running the unit tests.
""", file=sys.stderr)
sys.exit(2)
# Main package and test package (for modules in main package)
package = 'gpodder'
test_package = '.'.join((package, 'test'))
suite = unittest.TestSuite()
coverage_modules = []
# Modules (in gpodder) for which doctests exist
# ex: Doctests embedded in "gpodder.util", coverage reported for "gpodder.util"
doctest_modules = ['util', 'jsonconfig']
for module in doctest_modules:
doctest_mod = __import__('.'.join((package, module)), fromlist=[module])
suite.addTest(doctest.DocTestSuite(doctest_mod))
coverage_modules.append(doctest_mod)
# Modules (in gpodder) for which unit tests (in gpodder.test) exist
# ex: Tests are in "gpodder.test.model", coverage reported for "gpodder.model"
test_modules = ['model']
for module in test_modules:
test_mod = __import__('.'.join((test_package, module)), fromlist=[module])
coverage_mod = __import__('.'.join((package, module)), fromlist=[module])
suite.addTest(unittest.defaultTestLoader.loadTestsFromModule(test_mod))
coverage_modules.append(coverage_mod)
try:
# If you want a HTML-based test report, install HTMLTestRunner from:
# http://tungwaiyip.info/software/HTMLTestRunner.html
import HTMLTestRunner
REPORT_FILENAME = 'test_report.html'
runner = HTMLTestRunner.HTMLTestRunner(stream=open(REPORT_FILENAME, 'w'))
print("""
HTML Test Report will be written to %s
""" % REPORT_FILENAME)
except ImportError:
runner = unittest.TextTestRunner(verbosity=2)
try:
import coverage
except ImportError:
coverage = None
if __name__ == '__main__':
if coverage is not None:
cov = coverage.Coverage()
cov.erase()
cov.start()
result = runner.run(suite)
if not result.wasSuccessful():
sys.exit(1)
if coverage is not None:
cov.stop()
cov.report(coverage_modules)
cov.erase()
else:
print("""
No coverage reporting done (Python module "coverage" is missing)
Please install the python-coverage package to get coverage reporting.
""", file=sys.stderr)

View File

@ -31,6 +31,7 @@ are not tied to any specific part of gPodder.
"""
import collections
import datetime
import email
import glob
import gzip
import http.client
@ -55,23 +56,20 @@ import threading
import time
import urllib.error
import urllib.parse
import urllib.request
import webbrowser
import xml.dom.minidom
from html.entities import entitydefs
from html.entities import entitydefs, name2codepoint
from html.parser import HTMLParser
import requests
import requests.exceptions
from requests.packages.urllib3.util.retry import Retry
import gpodder
logger = logging.getLogger(__name__)
if sys.hexversion < 0x03000000:
from html.parser import HTMLParser
from html.entities import name2codepoint
else:
from html.parser import HTMLParser
from html.entities import name2codepoint
try:
import html5lib
except ImportError:
@ -1187,31 +1185,27 @@ def url_add_authentication(url, username, password):
return urllib.parse.urlunsplit(url_parts)
def urlopen(url, headers=None, data=None, timeout=None):
def urlopen(url, headers=None, data=None, timeout=None, **kwargs):
"""
An URL opener with the User-agent set to gPodder (with version)
"""
username, password = username_password_from_url(url)
if username is not None or password is not None:
url = url_strip_authentication(url)
password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
password_mgr.add_password(None, url, username, password)
handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
opener = urllib.request.build_opener(handler)
else:
opener = urllib.request.build_opener()
if headers is None:
headers = {}
else:
headers = dict(headers)
if not timeout:
timeout = gpodder.SOCKET_TIMEOUT
retry_strategy = Retry(
total=3,
status_forcelist=Retry.RETRY_AFTER_STATUS_CODES.union((408, 418, 504, 598, 599,)))
s = requests.Session()
a = requests.adapters.HTTPAdapter(max_retries=retry_strategy)
s.mount('http://', a)
s.mount('https://', a)
headers.update({'User-agent': gpodder.user_agent})
request = urllib.request.Request(url, data=data, headers=headers)
if timeout is None:
return opener.open(request)
else:
return opener.open(request, timeout=timeout)
return s.get(url, headers=headers, data=data, timeout=timeout, **kwargs)
def get_real_url(url):
@ -1219,7 +1213,7 @@ def get_real_url(url):
Gets the real URL of a file and resolves all redirects.
"""
try:
return urlopen(url).geturl()
return urlopen(url).url
except:
logger.error('Getting real url for %s', url, exc_info=True)
return url
@ -1805,8 +1799,7 @@ def get_update_info():
(False, '3.0.5', '2012-02-29', 10)
"""
url = 'https://api.github.com/repos/gpodder/gpodder/releases/latest'
data = urlopen(url).read().decode('utf-8')
info = json.loads(data)
info = urlopen(url).json()
latest_version = info.get('tag_name', '').replace('gpodder-', '')
release_date = info['published_at']
@ -1922,9 +1915,9 @@ def website_reachable(url):
return (False, None)
try:
response = urllib.request.urlopen(url, timeout=1)
response = requests.get(url, timeout=1)
return (True, response)
except urllib.error.URLError as err:
except requests.exceptions.RequestException:
pass
return (False, None)
@ -2182,3 +2175,42 @@ def parse_mimetype(mimetype):
except MIMETypeException as e:
print(e)
return (None, None, {})
def get_header_param(headers, param, header_name):
"""Extract a HTTP header parameter from a dict
Uses the "email" module to retrieve parameters
from HTTP headers. This can be used to get the
"filename" parameter of the "content-disposition"
header for downloads to pick a good filename.
Returns None if the filename cannot be retrieved.
"""
value = None
try:
headers_string = ['%s:%s' % (k, v) for k, v in list(headers.items())]
msg = email.message_from_string('\n'.join(headers_string))
if header_name in msg:
raw_value = msg.get_param(param, header=header_name)
if raw_value is not None:
value = email.utils.collapse_rfc2231_value(raw_value)
except Exception as e:
logger.error('Cannot get %s from %s', param, header_name, exc_info=True)
return value
def response_text(response, default_encoding='utf-8'):
"""
Utility method to return urlopen response's text.
Requests uses only the charset info in content-type, then defaults to ISO-8859-1
when content-type=text/*.
We could use chardet (via response.apparent_encoding) but it's slow so often it's
simpler to just use the known encoding.
:return: textual body of the response
"""
if 'charset=' in response.headers.get('content-type'):
return response.text
else:
return response.content.decode(default_encoding)

View File

@ -65,8 +65,7 @@ def get_real_download_url(url, preferred_fileformat=None):
data_config_url = 'https://player.vimeo.com/video/%s/config' % (video_id,)
def get_urls(data_config_url):
data_config_data = util.urlopen(data_config_url).read().decode('utf-8')
data_config = json.loads(data_config_data)
data_config = util.urlopen(data_config_url).json()
for fileinfo in list(data_config['request']['files'].values()):
if not isinstance(fileinfo, list):
continue

View File

@ -20,6 +20,7 @@
# Justin Forest <justin.forest@gmail.com> 2008-10-13
#
import io
import json
import logging
import re
@ -366,8 +367,8 @@ def get_channel_id_url(url):
if 'youtube.com' in url:
try:
channel_url = ''
raw_xml_data = util.urlopen(url).read().decode('utf-8')
xml_data = xml.etree.ElementTree.fromstring(raw_xml_data)
raw_xml_data = io.BytesIO(util.urlopen(url).content)
xml_data = xml.etree.ElementTree.parse(raw_xml_data)
channel_id = xml_data.find("{http://www.youtube.com/xml/schemas/2015}channelId").text
channel_url = 'https://www.youtube.com/channel/{}'.format(channel_id)
return channel_url
@ -402,7 +403,7 @@ def get_cover(url):
try:
channel_url = get_channel_id_url(url)
html_data = util.urlopen(channel_url).read().decode('utf-8')
html_data = util.response_text(util.urlopen(channel_url))
parser = YouTubeHTMLCoverParser()
parser.feed(html_data)
if parser.url:
@ -433,7 +434,7 @@ def get_channel_desc(url):
try:
channel_url = get_channel_id_url(url)
html_data = util.urlopen(channel_url).read().decode('utf-8')
html_data = util.response_text(util.urlopen(channel_url))
parser = YouTubeHTMLDesc()
parser.feed(html_data)
if parser.description:

115
tests/test_feedcore.py Normal file
View File

@ -0,0 +1,115 @@
# -*- coding: utf-8 -*-
#
# gPodder - A media aggregator and podcast client
# Copyright (c) 2005-2023 The gPodder Team
#
# gPodder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# gPodder is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import io
import pytest
import requests.exceptions
from gpodder.feedcore import Fetcher, Result, NEW_LOCATION, NOT_MODIFIED, UPDATED_FEED
class MyFetcher(Fetcher):
def parse_feed(self, url, data_stream, headers, status, **kwargs):
return Result(status, {
'parse_feed': {
'url': url,
'data_stream': data_stream,
'headers': headers,
'extra_args': dict(**kwargs),
},
})
SIMPLE_RSS = """
<rss>
<channel>
<title>Feed Name</title>
<item>
<title>Some Episode Title</title>
<guid>urn:test/ep1</guid>
<pubDate>Sun, 25 Nov 2018 17:28:03 +0000</pubDate>
<enclosure
url="/ep1.ogg"
type="audio/ogg"
length="100000"/>
</item>
</channel>
</rss>
"""
def test_easy(httpserver):
res_data = SIMPLE_RSS
httpserver.expect_request('/feed').respond_with_data(SIMPLE_RSS, content_type='text/xml')
res = MyFetcher().fetch(httpserver.url_for('/feed'), custom_key='value')
assert res.status == UPDATED_FEED
args = res.feed['parse_feed']
assert args['headers']['content-type'] == 'text/xml'
assert isinstance(args['data_stream'], io.BytesIO)
assert args['data_stream'].getvalue().decode('utf-8') == SIMPLE_RSS
assert args['url'] == httpserver.url_for('/feed')
assert args['extra_args']['custom_key'] == 'value'
def test_redirect(httpserver):
res_data = SIMPLE_RSS
httpserver.expect_request('/endfeed').respond_with_data(SIMPLE_RSS, content_type='text/xml')
redir_headers = {
'Location': '/endfeed',
}
# temporary redirect
httpserver.expect_request('/feed').respond_with_data(status=302, headers=redir_headers)
httpserver.expect_request('/permanentfeed').respond_with_data(status=301, headers=redir_headers)
res = MyFetcher().fetch(httpserver.url_for('/feed'))
assert res.status == UPDATED_FEED
args = res.feed['parse_feed']
assert args['headers']['content-type'] == 'text/xml'
assert isinstance(args['data_stream'], io.BytesIO)
assert args['data_stream'].getvalue().decode('utf-8') == SIMPLE_RSS
assert args['url'] == httpserver.url_for('/feed')
res = MyFetcher().fetch(httpserver.url_for('/permanentfeed'))
assert res.status == NEW_LOCATION
assert res.feed == httpserver.url_for('/endfeed')
def test_redirect_loop(httpserver):
""" verify that feedcore fetching will not loop indefinitely on redirects """
redir_headers = {
'Location': '/feed', # it loops
}
httpserver.expect_request('/feed').respond_with_data(status=302, headers=redir_headers)
with pytest.raises(requests.exceptions.TooManyRedirects):
res = MyFetcher().fetch(httpserver.url_for('/feed'))
assert res.status == UPDATED_FEED
args = res.feed['parse_feed']
assert args['headers']['content-type'] == 'text/xml'
assert isinstance(args['data_stream'], io.BytesIO)
assert args['data_stream'].getvalue().decode('utf-8') == SIMPLE_RSS
assert args['url'] == httpserver.url_for('/feed')
def test_temporary_error_retry(httpserver):
httpserver.expect_ordered_request('/feed').respond_with_data(status=503)
res_data = SIMPLE_RSS
httpserver.expect_ordered_request('/feed').respond_with_data(SIMPLE_RSS, content_type='text/xml')
res = MyFetcher().fetch(httpserver.url_for('/feed'))
assert res.status == UPDATED_FEED
args = res.feed['parse_feed']
assert args['headers']['content-type'] == 'text/xml'
assert args['url'] == httpserver.url_for('/feed')

View File

@ -16,12 +16,11 @@ from jinja2 import Template
def debug_requests():
""" turn requests debug on """
import logging
# These two lines enable debugging at httplib level (requests->urllib3->http.client)
# You will see the REQUEST, including HEADERS and DATA, and RESPONSE with HEADERS but without DATA.
# The only thing missing will be the response.body which is not logged.
import http.client as http_client
import logging
http_client.HTTPConnection.debuglevel = 1
# You must initialize logging, otherwise you'll not see debug output.

View File

@ -11,6 +11,7 @@ import http.server
import re
import sys
import threading
import time
USERNAME = 'user@example.com' # Username used for HTTP Authentication
PASSWORD = 'secret' # Password used for HTTP Authentication
@ -24,8 +25,10 @@ URL = 'http://%(HOST)s:%(PORT)s' % locals()
FEEDNAME = sys.argv[0] # The title of the RSS feed
REDIRECT = 'redirect.rss' # The path for a redirection
REDIRECT_TO_BAD_HOST = 'redirect_bad' # The path for a redirection
FEEDFILE = 'feed.rss' # The "filename" of the feed on the server
EPISODES = 'episode' # Base name for the episode files
TIMEOUT = 'timeout' # The path to never return
EPISODES_EXT = '.mp3' # Extension for the episode files
EPISODES_MIME = 'audio/mpeg' # Mime type for the episode files
EP_COUNT = 7 # Number of episodes in the feed
@ -64,6 +67,36 @@ def mkrss(items=EP_COUNT):
type="%(EPISODES_MIME)s"
length="%(SIZE)s"/>
</item>""" % dict(list(locals().items()) + list(globals().items()))
ITEMS += """
<item>
<title>Server Timeout Episode</title>
<guid>tag:test.gpodder.org,2012:timeout</guid>
<pubDate>Sun, 25 Nov 2018 17:28:03 +0000</pubDate>
<enclosure
url="%(URL)s/%(TIMEOUT)s"
type="%(EPISODES_MIME)s"
length="%(SIZE)s"/>
</item>""" % dict(list(locals().items()) + list(globals().items()))
ITEMS += """
<item>
<title>Bad Host Episode</title>
<guid>tag:test.gpodder.org,2012:timeout</guid>
<pubDate>Sun, 25 Nov 2018 17:28:03 +0000</pubDate>
<enclosure
url="%(URL)s/%(REDIRECT_TO_BAD_HOST)s"
type="%(EPISODES_MIME)s"
length="%(SIZE)s"/>
</item>""" % dict(list(locals().items()) + list(globals().items()))
ITEMS += """
<item>
<title>Space in url Episode</title>
<guid>tag:test.gpodder.org,2012:timeout</guid>
<pubDate>Sun, 25 Nov 2018 17:28:03 +0000</pubDate>
<enclosure
url="%(URL)s/%(EPISODES)s with space%(EPISODES_EXT)s"
type="%(EPISODES_MIME)s"
length="%(SIZE)s"/>
</item>""" % dict(list(locals().items()) + list(globals().items()))
return """
<rss>
@ -76,13 +109,15 @@ def mkrss(items=EP_COUNT):
def mkdata(size=SIZE):
"""Generate dummy data of a given size (in bytes)"""
return b''.join(chr(32 + (i % (127 - 32))) for i in range(size))
return bytes([32 + (i % (127 - 32)) for i in range(size)])
class AuthRequestHandler(http.server.BaseHTTPRequestHandler):
FEEDFILE_PATH = '/%s' % FEEDFILE
EPISODES_PATH = '/%s' % EPISODES
REDIRECT_PATH = '/%s' % REDIRECT
REDIRECT_TO_BAD_HOST_PATH = '/%s' % REDIRECT_TO_BAD_HOST
TIMEOUT_PATH = '/%s' % TIMEOUT
def do_GET(self):
authorized = False
@ -113,6 +148,16 @@ class AuthRequestHandler(http.server.BaseHTTPRequestHandler):
self.send_header('Location', '%s/%s' % (URL, FEEDFILE))
self.end_headers()
return
elif self.path.startswith(self.REDIRECT_TO_BAD_HOST_PATH):
print('Redirect request => bad host.')
self.send_response(302)
self.send_header('Location', '//notthere.gpodder.io/%s' % (FEEDFILE))
self.end_headers()
return
elif self.path == self.TIMEOUT_PATH:
# will need to restart the server or wait 80s before next request
time.sleep(80)
return
if not authorized:
print('Not authorized - sending WWW-Authenticate header.')
@ -144,6 +189,7 @@ if __name__ == '__main__':
print("""
Feed URL: %(URL)s/%(FEEDFILE)s
Redirect URL: http://%(HOST)s:%(RPORT)d/%(REDIRECT)s
Timeout URL: %(URL)s/%(TIMEOUT)s
Username: %(USERNAME)s
Password: %(PASSWORD)s
""" % locals())

View File

@ -92,6 +92,8 @@ webencodings==0.5.1
certifi==2020.11.8
mutagen==1.45.1
youtube_dl==2020.11.21.1
requests==2.25.0
PySocks==1.7.1
"
function install_deps {
@ -119,7 +121,6 @@ function install_deps {
mkdir -p ${MINGW_ROOT}/ssl
site_packages=$(build_python -c 'import sys;print(next(c for c in sys.path if "site-packages" in c))')
cp -v ${site_packages}/certifi/cacert.pem ${MINGW_ROOT}/ssl/cert.pem
build_pip uninstall -y certifi
build_pacman --noconfirm -Rdds mingw-w64-"${ARCH}"-python3-pip || true
}