Merge branch 'master' into gtk3

- soundcloud non downloadable tracks
- try to parse feeds even if content-type says HTML
This commit is contained in:
Eric Le Lay 2017-11-27 22:27:41 +01:00
commit 858497936f
2 changed files with 46 additions and 22 deletions

View File

@ -36,9 +36,11 @@ import urllib.parse
try:
# Python 2
from rfc822 import mktime_tz
from StringIO import StringIO
except ImportError:
# Python 3
from email.utils import mktime_tz
from io import StringIO
class ExceptionWithData(Exception):
@ -171,31 +173,39 @@ class Fetcher(object):
except HTTPError as e:
return self._check_statuscode(e, e.geturl())
if not is_local and stream.headers.get('content-type', '').startswith('text/html'):
if autodiscovery:
ad = FeedAutodiscovery(url)
data = stream
if autodiscovery and not is_local and stream.headers.get('content-type', '').startswith('text/html'):
# Not very robust attempt to detect encoding: http://stackoverflow.com/a/1495675/1072626
charset = stream.headers.get_param('charset')
if charset is None:
charset = 'utf-8' # utf-8 appears hard-coded elsewhere in this codebase
# Not very robust attempt to detect encoding: http://stackoverflow.com/a/1495675/1072626
charset = stream.headers.get_param('charset')
if charset is None:
charset = 'utf-8' # utf-8 appears hard-coded elsewhere in this codebase
# We use StringIO in case the stream needs to be read again
data = StringIO(stream.read().decode(charset))
ad = FeedAutodiscovery(url)
ad.feed(stream.read().decode(charset))
if ad._resolved_url:
try:
self._parse_feed(ad._resolved_url, None, None, False)
return Result(NEW_LOCATION, ad._resolved_url)
except Exception as e:
logger.warn('Feed autodiscovery failed', exc_info=True)
ad.feed(data.getvalue())
if ad._resolved_url:
try:
self._parse_feed(ad._resolved_url, None, None, False)
return Result(NEW_LOCATION, ad._resolved_url)
except Exception as e:
logger.warn('Feed autodiscovery failed', exc_info=True)
# Second, try to resolve the URL
url = self._resolve_url(url)
if url:
return Result(NEW_LOCATION, url)
# Second, try to resolve the URL
url = self._resolve_url(url)
if url:
return Result(NEW_LOCATION, url)
raise InvalidFeed('Got HTML document instead')
# Reset the stream so podcastparser can give it a go
data.seek(0)
try:
feed = podcastparser.parse(url, data)
except ValueError as e:
raise InvalidFeed('Could not parse feed: {msg}'.format(msg=e))
feed = podcastparser.parse(url, stream)
if is_local:
feed['headers'] = {}
return Result(UPDATED_FEED, feed)

View File

@ -30,6 +30,7 @@ from gpodder import util
import json
import logging
import os
import time
@ -42,6 +43,9 @@ import urllib.request, urllib.parse, urllib.error
CONSUMER_KEY = 'zrweghtEtnZLpXf3mlm8mQ'
logger = logging.getLogger(__name__)
def soundcloud_parsedate(s):
"""Parse a string into a unix timestamp
@ -137,9 +141,19 @@ class SoundcloudUser(object):
try:
json_url = 'https://api.soundcloud.com/users/%(user)s/%(feed)s.json?filter=downloadable&consumer_key=%(consumer_key)s&limit=200' \
% { "user":self.get_user_id(), "feed":feed, "consumer_key": CONSUMER_KEY }
logger.debug("loading %s", json_url)
tracks = (track for track in json.loads(util.urlopen(json_url).read().decode('utf-8')) \
if track['downloadable'])
json_tracks = json.loads(util.urlopen(json_url).read().decode('utf-8'))
tracks = [track for track in json_tracks if track['downloadable']]
total_count = len(tracks) + len([track for track in json_tracks
if not track['downloadable']])
if len(tracks) == 0 and total_count > 0:
logger.warn("Download of all %i %s of user %s is disabled" %
(total_count, feed, self.username))
else:
logger.info("%i/%i downloadable tracks for user %s %s feed" %
(len(tracks), total_count, self.username, feed))
for track in tracks:
# Prefer stream URL (MP3), fallback to download URL