From 7c551ee6aa9ab3045586629c4368f008a6ddd8c0 Mon Sep 17 00:00:00 2001 From: Eric Le Lay Date: Sat, 18 Jul 2020 15:11:44 +0200 Subject: [PATCH] gpodder escapist_videos doesn't work with escapist magazine v2 --- src/gpodder/escapist_videos.py | 183 --------------------------------- src/gpodder/model.py | 10 +- 2 files changed, 4 insertions(+), 189 deletions(-) delete mode 100644 src/gpodder/escapist_videos.py diff --git a/src/gpodder/escapist_videos.py b/src/gpodder/escapist_videos.py deleted file mode 100644 index c60fd59e..00000000 --- a/src/gpodder/escapist_videos.py +++ /dev/null @@ -1,183 +0,0 @@ -# -*- coding: utf-8 -*- -# -# gPodder - A media aggregator and podcast client -# Copyright (c) 2005-2018 The gPodder Team -# -# gPodder is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# gPodder is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# - -# -# gpodder.escapist - Escapist Videos download magic -# somini ; 2014-09-14 -# - - -import json -import logging -import re -import urllib.error -import urllib.parse -import urllib.request - -import gpodder -from gpodder import registry, util - -logger = logging.getLogger(__name__) - - -# This matches the more reliable URL -ESCAPIST_NUMBER_RE = re.compile(r'http://www.escapistmagazine.com/videos/view/(\d+)', re.IGNORECASE) -# This matches regular URL, mainly those that come in the RSS feeds -ESCAPIST_REGULAR_RE = re.compile(r'http://www.escapistmagazine.com/videos/view/([\w-]+)/(\d+)-', re.IGNORECASE) -# This finds the RSS for a given URL -DATA_RSS_RE = re.compile(r'http://www.escapistmagazine.com/rss/videos/list/([1-9][0-9]*)\.xml') -# This matches the "configuration". The important part is the JSON between the parens -DATA_CONFIG_RE = re.compile(r'imsVideo\.play\((.*)\)\;\<\/script\>', re.IGNORECASE) -# This matches the cover art for an RSS. We shouldn't parse XML with regex. -DATA_COVERART_RE = re.compile(r'(http:.+\.jpg)') - - -class EscapistError(BaseException): pass - - -@registry.download_url.register -def escapist_real_download_url(unused_config, episode): - res = get_real_download_url(episode.url) - return None if res == episode.url else res - - -def get_real_download_url(url): - video_id = get_escapist_id(url) - if video_id is None: - return url - - web_data = get_escapist_web(video_id) - - data_config_frag = DATA_CONFIG_RE.search(web_data) - - data_config_url = get_escapist_config_url(data_config_frag.group(1)) - - if data_config_url is None: - raise EscapistError('Cannot parse configuration from the site') - - logger.debug('Config URL: %s', data_config_url) - - data_config_data = util.urlopen(data_config_url).content.decode('utf-8') - - # TODO: This second argument should get a real name - real_url = get_escapist_real_url(data_config_data, data_config_frag.group(1)) - - if real_url is None: - raise EscapistError('Cannot get MP4 URL from The Escapist') - elif "sales-marketing/" in real_url: - raise EscapistError('Oops, seems The Escapist blocked this IP. Wait a few days/weeks to get it unblocked') - else: - return real_url - - -def get_escapist_id(url): - result = ESCAPIST_NUMBER_RE.match(url) - if result is not None: - return result.group(1) - - result = ESCAPIST_REGULAR_RE.match(url) - if result is not None: - return result.group(2) - - return None - - -def is_video_link(url): - return (get_escapist_id(url) is not None) - - -def get_real_channel_url(url): - video_id = get_escapist_id(url) - if video_id is None: - return url - - web_data = get_escapist_web(video_id) - - data_config_frag = DATA_RSS_RE.search(web_data) - if data_config_frag is None: - raise EscapistError('Cannot get RSS URL from The Escapist') - return data_config_frag.group(0) - - -def get_real_cover(url): - rss_url = get_real_channel_url(url) - if rss_url is None: - return None - - rss_data = util.urlopen(rss_url).content.decode('utf-8') - rss_data_frag = DATA_COVERART_RE.search(rss_data) - - if rss_data_frag is None: - return None - - return rss_data_frag.group(1) - - -def get_escapist_web(video_id): - if video_id is None: - return None - - web_url = 'http://www.escapistmagazine.com/videos/view/%s' % video_id - return util.urlopen(web_url).text - - -def get_escapist_config_url(data): - if data is None: - return None - - query_string = urllib.parse.urlencode(json.loads(data)) - - return 'http://www.escapistmagazine.com/videos/vidconfig.php?%s' % query_string - - -def get_escapist_real_url(data, config_json): - if data is None: - return None - - config_data = json.loads(config_json) - if config_data is None: - return None - - # The data is scrambled, unscramble - # Direct port from 'imsVideos.prototype.processRequest' from the file 'ims_videos.min.js' - - one_hash = config_data["hash"] - # Turn the string into numbers - hash_n = [ord(x) for x in one_hash] - # Split the data into 2char strings - hex_hashes = [data[x:(x + 2)] for x in range(0, len(data), 2)] - # Turn the strings into numbers, considering the hex value - num_hashes = [int(h, 16) for h in hex_hashes] - # Characters again, from the value - # str_hashes = [ unichr(n) for n in num_hashes ] - - # Bitwise XOR num_hashes and the hash - result_num = [] - for idx in range(0, len(num_hashes)): - result_num.append(num_hashes[idx] ^ hash_n[idx % len(hash_n)]) - - # At last, Numbers back into characters - result = ''.join([chr(x) for x in result_num]) - # A wild JSON appears... - # You use "Master Ball"... - escapist_cfg = json.loads(result) - # It's super effective! - - # TODO: There's a way to choose different video types, for now just pick MP4@480p - return escapist_cfg["files"]["videos"][2]["src"] diff --git a/src/gpodder/model.py b/src/gpodder/model.py index 51b8b8f4..f699428e 100644 --- a/src/gpodder/model.py +++ b/src/gpodder/model.py @@ -38,7 +38,7 @@ import time import podcastparser import gpodder -from gpodder import (coverart, escapist_videos, feedcore, registry, schema, +from gpodder import (coverart, feedcore, registry, schema, util, vimeo, youtube) logger = logging.getLogger(__name__) @@ -198,7 +198,6 @@ class gPodderFetcher(feedcore.Fetcher): def _resolve_url(self, url): url = youtube.get_real_channel_url(url) url = vimeo.get_real_channel_url(url) - url = escapist_videos.get_real_channel_url(url) return url def parse_feed(self, url, data_stream, headers, status, max_episodes=0, **kwargs): @@ -309,7 +308,7 @@ class PodcastEpisode(PodcastModelObject): if not episode.url: return None - if any(mod.is_video_link(episode.url) for mod in (youtube, vimeo, escapist_videos)): + if any(mod.is_video_link(episode.url) for mod in (youtube, vimeo)): return episode # Check if we can resolve this link to a audio/video file @@ -582,7 +581,6 @@ class PodcastEpisode(PodcastModelObject): # Use title for YouTube, Vimeo and Soundcloud downloads if (youtube.is_video_link(self.url) or vimeo.is_video_link(self.url) or - escapist_videos.is_video_link(self.url) or episode_filename == 'stream'): episode_filename = self.title @@ -672,7 +670,7 @@ class PodcastEpisode(PodcastModelObject): def file_type(self): # Assume all YouTube/Vimeo links are video files - if youtube.is_video_link(self.url) or vimeo.is_video_link(self.url) or escapist_videos.is_video_link(self.url): + if youtube.is_video_link(self.url) or vimeo.is_video_link(self.url): return 'video' return util.file_type_by_extension(self.extension()) @@ -1209,7 +1207,7 @@ class PodcastChannel(PodcastModelObject): return self.section def _get_content_type(self): - if 'youtube.com' in self.url or 'vimeo.com' in self.url or 'escapistmagazine.com' in self.url: + if 'youtube.com' in self.url or 'vimeo.com' in self.url: return _('Video') audio, video, other = 0, 0, 0