YouTube Updates

YouTube full size 900x900 image was  never being selected, images were always 100x100, due to HTMLParser interating till all matches were found.  900x900 was found first, then 100x100px image, which was the final result.  Now all results are saved in a list, the first find is the selected result.

YouTube channel description and link were not being discovered, or saved to the sqlite DB.  Desc is now retreived from the YouTube channel webpage in the same manner as the cover image link, and the link is the YouTube channel URL.
This commit is contained in:
Hiltronix 2019-08-19 22:53:34 -05:00
parent 08a0b8255b
commit b49327fe09
2 changed files with 57 additions and 8 deletions

View File

@ -1027,6 +1027,10 @@ class PodcastChannel(PodcastModelObject):
self._consume_updated_title(title)
self.link = link
self.description = description
vid = youtube.get_youtube_id(self.url)
if vid is not None:
self.description = youtube.get_channel_desc(self.url)
self.link = youtube.get_channel_id_url(self.url)
self.cover_url = cover_url
self.payment_url = payment_url
self.save()

View File

@ -242,6 +242,20 @@ def get_real_channel_url(url):
return for_each_feed_pattern(return_user_feed, url, url)
def get_channel_id_url(url):
if 'youtube.com' in url:
try:
channel_url = ''
raw_xml_data = util.urlopen(url).read().decode('utf-8')
xml_data = xml.etree.ElementTree.fromstring(raw_xml_data)
channel_id = xml_data.find("{http://www.youtube.com/xml/schemas/2015}channelId").text
channel_url = 'https://www.youtube.com/channel/{}'.format(channel_id)
return channel_url
except Exception:
logger.warning('Could not retrieve youtube channel id.', exc_info=True)
def get_cover(url):
if 'youtube.com' in url:
@ -249,7 +263,7 @@ def get_cover(url):
"""This custom html parser searches for the youtube channel thumbnail/avatar"""
def __init__(self):
super().__init__()
self.url = ""
self.url = []
def handle_starttag(self, tag, attributes):
attribute_dict = {attribute[0]: attribute[1] for attribute in attributes}
@ -258,29 +272,60 @@ def get_cover(url):
if tag == 'link' \
and 'rel' in attribute_dict \
and attribute_dict['rel'] == 'image_src':
self.url = attribute_dict['href']
self.url.append(attribute_dict['href'])
# Fallback to image that may only be 100x100px.
elif tag == 'img' \
and 'class' in attribute_dict \
and attribute_dict['class'] == "channel-header-profile-image":
self.url = attribute_dict['src']
self.url.append(attribute_dict['src'])
try:
raw_xml_data = util.urlopen(url).read().decode('utf-8')
xml_data = xml.etree.ElementTree.fromstring(raw_xml_data)
channel_id = xml_data.find("{http://www.youtube.com/xml/schemas/2015}channelId").text
channel_url = 'https://www.youtube.com/channel/{}'.format(channel_id)
channel_url = get_channel_id_url(url)
html_data = util.urlopen(channel_url).read().decode('utf-8')
parser = YouTubeHTMLCoverParser()
parser.feed(html_data)
if parser.url:
logger.debug('Youtube cover art for {} is: {}'.format(url, parser.url))
return parser.url
return parser.url[0]
except Exception:
logger.warning('Could not retrieve cover art', exc_info=True)
def get_channel_desc(url):
if 'youtube.com' in url:
class YouTubeHTMLDesc(HTMLParser):
"""This custom html parser searches for the YouTube channel description."""
def __init__(self):
super().__init__()
self.description = ''
def handle_starttag(self, tag, attributes):
attribute_dict = {attribute[0]: attribute[1] for attribute in attributes}
# Get YouTube channel description.
if tag == 'meta' \
and 'name' in attribute_dict \
and attribute_dict['name'] == "description":
self.description = attribute_dict['content']
try:
channel_url = get_channel_id_url(url)
html_data = util.urlopen(channel_url).read().decode('utf-8')
parser = YouTubeHTMLDesc()
parser.feed(html_data)
if parser.description:
logger.debug('YouTube description for {} is: {}'.format(url, parser.description))
return parser.description
else:
logger.debug('YouTube description for {} is not provided.')
return 'No description available.'
except Exception:
logger.warning('Could not retrieve YouTube channel description.', exc_info=True)
def get_channels_for_user(username, api_key_v3):
# already a channel ID: return videos.xml.