gpodder/src/gpodder/libpodcasts.py

790 lines
27 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
#
# gPodder - A media aggregator and podcast client
# Copyright (C) 2005-2007 Thomas Perl <thp at perli.net>
#
# gPodder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# gPodder is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#
# libpodcasts.py -- data classes for gpodder
# thomas perl <thp@perli.net> 20051029
#
# Contains code based on:
# liblocdbwriter.py (2006-01-09)
# liblocdbreader.py (2006-01-10)
#
import gtk
import gobject
import pango
from gpodder import util
from gpodder import opml
from gpodder import cache
from gpodder import services
from liblogger import log
import libgpodder
from os.path import exists
from os.path import basename
import os.path
import os
import glob
import shutil
import sys
import urllib
import urlparse
import time
import threading
from datetime import datetime
from libtagupdate import update_metadata_on_file
from libtagupdate import tagging_supported
from threading import Event
import re
from types import ListType
from email.Utils import mktime_tz
from email.Utils import parsedate_tz
from xml.sax import saxutils
import xml.dom.minidom
import md5
import string
import shelve
global_lock = threading.RLock()
class ChannelSettings(object):
storage = shelve.open( libgpodder.gPodderLib().channel_settings_file)
@classmethod
def get_settings_by_url( cls, url):
if isinstance( url, unicode):
url = url.encode('utf-8')
log( 'Trying to get settings for %s', url)
if cls.storage.has_key( url):
return cls.storage[url]
else:
return {}
@classmethod
def set_settings_by_url( cls, url, settings):
if isinstance( url, unicode):
url = url.encode('utf-8')
log( 'Saving settings for %s', url)
cls.storage[url] = settings
cls.storage.sync()
class podcastChannel(ListType):
"""holds data for a complete channel"""
SETTINGS = ('sync_to_devices', 'is_music_channel', 'device_playlist_name','override_title','username','password')
icon_cache = {}
storage = shelve.open( libgpodder.gPodderLib().feed_cache_file)
fc = cache.Cache( storage)
@classmethod
def get_by_url( cls, url, force_update = False, offline = False):
if isinstance( url, unicode):
url = url.encode('utf-8')
c = cls.fc.fetch( url, force_update, offline)
channel = podcastChannel( url)
channel.load_settings()
channel.title = c.feed.title
if hasattr( c.feed, 'link'):
channel.link = c.feed.link
if hasattr( c.feed, 'subtitle'):
channel.description = util.remove_html_tags( c.feed.subtitle)
if hasattr( c.feed, 'updated_parsed'):
channel.pubDate = util.updated_parsed_to_rfc2822( c.feed.updated_parsed)
if hasattr( c.feed, 'image'):
if c.feed.image.href:
channel.image = c.feed.image.href
for entry in c.entries:
if not hasattr( entry, 'enclosures'):
log('Skipping entry: %s', entry.get( 'id', '(no id available)'), sender = channel)
continue
episode = None
try:
episode = podcastItem.from_feedparser_entry( entry, channel)
except:
log( 'Cannot instantiate episode: %s. Skipping.', entry.get( 'id', '(no id available)'), sender = channel)
if episode:
channel.append( episode)
channel.sort( reverse = True)
cls.storage.sync()
return channel
@staticmethod
def create_from_dict( d, load_items = True, force_update = False, callback_error = None, offline = False):
if load_items:
try:
return podcastChannel.get_by_url( d['url'], force_update = force_update, offline= offline)
except:
callback_error and callback_error( _('Could not load channel feed from URL: %s') % d['url'])
log( 'Cannot load podcastChannel from URL: %s', d['url'])
c = podcastChannel()
for key in ( 'url', 'title', 'description' ):
if key in d:
setattr( c, key, d[key])
c.load_settings()
return c
def __init__( self, url = "", title = "", link = "", description = ""):
self.url = url
self.title = title
self.link = link
self.description = util.remove_html_tags( description)
self.image = None
self.pubDate = ''
# should this channel be synced to devices? (ex: iPod)
self.sync_to_devices = True
# if this is set to true, device syncing (ex: iPod) should treat this as music, not as podcast)
self.is_music_channel = False
# to which playlist should be synced when "is_music_channel" is true?
self.device_playlist_name = 'gPodder'
# if set, this overrides the channel-provided title
self.override_title = ''
self.username = ''
self.password = ''
self.__tree_model = None
def get_filename( self):
"""Return the MD5 sum of the channel URL"""
return md5.new( self.url).hexdigest()
filename = property(fget=get_filename)
def get_title( self):
if self.override_title:
return self.override_title
elif not self.__title.strip():
return self.url
else:
return self.__title
def set_title( self, value):
self.__title = value.strip()
title = property(fget=get_title,
fset=set_title)
def set_custom_title( self, custom_title):
custom_title = custom_title.strip()
if custom_title != self.__title:
self.override_title = custom_title
else:
self.override_title = ''
def load_downloaded_episodes( self):
log( 'Loading downloaded episodes for %s', self.url, sender = self, traceback = True)
try:
return LocalDBReader( self.url).read( self.index_file)
except:
return podcastChannel( self.url, self.title, self.link, self.description)
def save_downloaded_episodes( self, channel):
try:
log( 'Setting localdb channel data => %s', self.index_file, sender = self)
LocalDBWriter( self.index_file).write( channel)
except:
log( 'Error writing to localdb: %s', self.index_file, sender = self, traceback = True)
def load_settings( self):
settings = ChannelSettings.get_settings_by_url( self.url)
for key in self.SETTINGS:
if settings.has_key( key):
setattr( self, key, settings[key])
def save_settings( self):
settings = {}
for key in self.SETTINGS:
settings[key] = getattr( self, key)
ChannelSettings.set_settings_by_url( self.url, settings)
def newest_pubdate_downloaded( self):
gl = libgpodder.gPodderLib()
# Try DownloadHistory's entries first
for episode in self:
if gl.history_is_downloaded( episode.url):
return episode.pubDate
# If nothing found, do pubDate comparison
pubdate = None
for episode in self.load_downloaded_episodes():
pubdate = episode.newer_pubdate( pubdate)
return pubdate
def get_new_episodes( self):
last_pubdate = self.newest_pubdate_downloaded()
gl = libgpodder.gPodderLib()
if not last_pubdate:
return self[0:min(len(self),gl.config.default_new)]
new_episodes = []
for episode in self.get_all_episodes():
# episode is older than newest downloaded
if episode.compare_pubdate( last_pubdate) < 0:
continue
# episode has been downloaded before
if episode.is_downloaded() or gl.history_is_downloaded( episode.url):
continue
# download is currently in progress
if services.download_status_manager.is_download_in_progress( episode.url):
continue
new_episodes.append( episode)
return new_episodes
def can_sort_by_pubdate( self):
for episode in self:
try:
mktime_tz(parsedate_tz( episode.pubDate))
except:
log('Episode %s has non-parseable pubDate. Sorting disabled.', episode.title)
return False
return True
def addDownloadedItem( self, item):
# no multithreaded access
global_lock.acquire()
downloaded_episodes = self.load_downloaded_episodes()
already_in_list = item.url in [ episode.url for episode in downloaded_episodes ]
# only append if not already in list
if not already_in_list:
downloaded_episodes.append( item)
self.save_downloaded_episodes( downloaded_episodes)
# Update metadata on file (if possible and wanted)
if libgpodder.gPodderLib().config.update_tags and tagging_supported():
filename = item.local_filename()
try:
update_metadata_on_file( filename, title = item.title, artist = self.title)
except:
log('Error while calling update_metadata_on_file() :(')
libgpodder.gPodderLib().history_mark_downloaded( item.url)
if item.file_type() == 'torrent':
torrent_filename = item.local_filename()
destination_filename = util.torrent_filename( torrent_filename)
libgpodder.gPodderLib().invoke_torrent( item.url, torrent_filename, destination_filename)
global_lock.release()
return not already_in_list
def is_played(self, item):
return libgpodder.gPodderLib().history_is_played( item.url)
def get_all_episodes( self):
episodes = []
added_urls = []
added_guids = []
# go through all episodes (both new and downloaded),
# prefer already-downloaded (in localdb)
for item in [] + self.load_downloaded_episodes() + self:
# skip items with the same guid (if it has a guid)
if item.guid and item.guid in added_guids:
continue
# skip items with the same download url
if item.url in added_urls:
continue
episodes.append( item)
added_urls.append( item.url)
if item.guid:
added_guids.append( item.guid)
episodes.sort( reverse = True)
return episodes
def force_update_tree_model( self):
self.__tree_model = None
def update_model( self):
new_episodes = self.get_new_episodes()
iter = self.tree_model.get_iter_first()
while iter != None:
self.iter_set_downloading_columns( self.tree_model, iter, new_episodes)
iter = self.tree_model.iter_next( iter)
@property
def tree_model( self):
if not self.__tree_model:
log('Generating TreeModel for %s', self.url, sender = self)
self.__tree_model = self.items_liststore()
return self.__tree_model
def iter_set_downloading_columns( self, model, iter, new_episodes = []):
url = model.get_value( iter, 0)
local_filename = model.get_value( iter, 8)
played = not libgpodder.gPodderLib().history_is_played( url)
if os.path.exists( local_filename):
file_type = util.file_type_by_extension( util.file_extension_from_url( url))
if file_type == 'audio':
status_icon = util.get_tree_icon( 'audio-x-generic', played, self.icon_cache)
elif file_type == 'video':
status_icon = util.get_tree_icon( 'video-x-generic', played, self.icon_cache)
elif file_type == 'torrent':
status_icon = util.get_tree_icon( 'applications-internet', played, self.icon_cache)
else:
status_icon = util.get_tree_icon( 'unknown', played, self.icon_cache)
elif services.download_status_manager.is_download_in_progress( url):
status_icon = util.get_tree_icon( gtk.STOCK_GO_DOWN, icon_cache = self.icon_cache)
elif libgpodder.gPodderLib().history_is_downloaded( url):
status_icon = util.get_tree_icon( gtk.STOCK_DELETE, icon_cache = self.icon_cache)
elif url in [ e.url for e in new_episodes ]:
status_icon = util.get_tree_icon( gtk.STOCK_NEW, icon_cache = self.icon_cache)
else:
status_icon = None
model.set( iter, 4, status_icon)
def items_liststore( self):
"""
Return a gtk.ListStore containing episodes for this channel
"""
new_model = gtk.ListStore( gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_BOOLEAN, gtk.gdk.Pixbuf, gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING)
new_episodes = self.get_new_episodes()
for item in self.get_all_episodes():
new_iter = new_model.append( ( item.url, item.title, libgpodder.gPodderLib().format_filesize( item.length), True, None, item.cute_pubdate(), item.one_line_description(), item.description, item.local_filename() ))
self.iter_set_downloading_columns( new_model, new_iter, new_episodes)
return new_model
def find_episode( self, url):
for item in self.get_all_episodes():
if url == item.url:
return item
return None
def get_save_dir(self):
save_dir = os.path.join( libgpodder.gPodderLib().downloaddir, self.filename, '')
# Create save_dir if it does not yet exist
if not util.make_directory( save_dir):
log( 'Could not create save_dir: %s', save_dir, sender = self)
return save_dir
save_dir = property(fget=get_save_dir)
def remove_downloaded( self):
shutil.rmtree( self.save_dir, True)
def get_index_file(self):
# gets index xml filename for downloaded channels list
return os.path.join( self.save_dir, 'index.xml')
index_file = property(fget=get_index_file)
def get_cover_file( self):
# gets cover filename for cover download cache
return os.path.join( self.save_dir, 'cover')
cover_file = property(fget=get_cover_file)
def delete_episode_by_url(self, url):
global_lock.acquire()
downloaded_episodes = self.load_downloaded_episodes()
for episode in self.get_all_episodes():
if episode.url == url:
util.delete_file( episode.local_filename())
if episode in downloaded_episodes:
downloaded_episodes.remove( episode)
self.save_downloaded_episodes( downloaded_episodes)
global_lock.release()
class podcastItem(object):
"""holds data for one object in a channel"""
@staticmethod
def from_feedparser_entry( entry, channel):
episode = podcastItem( channel)
episode.title = entry.get( 'title', util.get_first_line( util.remove_html_tags( entry.get( 'summary', ''))))
episode.link = entry.get( 'link', '')
episode.description = util.remove_html_tags( entry.get( 'summary', entry.get( 'link', entry.get( 'title', ''))))
episode.guid = entry.get( 'id', '')
if entry.get( 'updated_parsed', None):
episode.pubDate = util.updated_parsed_to_rfc2822( entry.updated_parsed)
if episode.title == '':
log( 'Warning: Episode has no title, adding anyways.. (Feed Is Buggy!)', sender = episode)
enclosure = entry.enclosures[0]
if len(entry.enclosures) > 1:
for e in entry.enclosures:
if hasattr( e, 'href') and hasattr( e, 'length') and hasattr( e, 'type') and (e.type.startswith('audio/') or e.type.startswith('video/')):
if util.normalize_feed_url( e.href) != None:
log( 'Selected enclosure: %s', e.href, sender = episode)
enclosure = e
break
episode.url = util.normalize_feed_url( enclosure.get( 'href', ''))
if not episode.url:
raise ValueError( 'Episode has an invalid URL')
if hasattr( enclosure, 'length'):
episode.length = enclosure.length
if hasattr( enclosure, 'type'):
episode.mimetype = enclosure.type
if episode.title == '':
( filename, extension ) = os.path.splitext( os.path.basename( episode.url))
episode.title = filename
return episode
def __init__( self, channel):
self.url = ''
self.title = ''
self.length = 0
self.mimetype = 'application/octet-stream'
self.guid = ''
self.description = ''
self.link = ''
self.channel = channel
self.pubDate = ''
def one_line_description( self):
lines = self.description.strip().splitlines()
if not lines or lines[0] == '':
return _('No description available')
else:
desc = lines[0].strip()
if len( desc) > 84:
return desc[:80] + '...'
else:
return desc
def is_downloaded( self):
return os.path.exists( self.local_filename())
def local_filename( self):
extension = util.file_extension_from_url( self.url)
return os.path.join( self.channel.save_dir, md5.new( self.url).hexdigest() + extension)
def sync_filename( self):
if libgpodder.gPodderLib().config.custom_sync_name_enabled:
return util.object_string_formatter( libgpodder.gPodderLib().config.custom_sync_name, episode = self, channel = self.channel)
else:
return self.title
def file_type( self):
return util.file_type_by_extension( util.file_extension_from_url( self.url))
@property
def basename( self):
return os.path.splitext( os.path.basename( self.url))[0]
@property
def published( self):
try:
return datetime.fromtimestamp( mktime_tz( parsedate_tz( self.pubDate))).strftime('%Y%m%d')
except:
log( 'Cannot format pubDate for "%s".', self.title, sender = self)
return '00000000'
def __cmp__( self, other):
try:
timestamp_self = int(mktime_tz( parsedate_tz( self.pubDate)))
timestamp_other = int(mktime_tz( parsedate_tz( other.pubDate)))
except:
# by default, do as if this is not the same
# this is here so that comparisons with None
# can be allowed (item != None -> True)
return -1
return timestamp_self - timestamp_other
def compare_pubdate( self, pubdate):
try:
timestamp_self = int(mktime_tz( parsedate_tz( self.pubDate)))
except:
return -1
try:
timestamp_other = int(mktime_tz( parsedate_tz( pubdate)))
except:
return 1
return timestamp_self - timestamp_other
def newer_pubdate( self, pubdate = None):
if self.compare_pubdate( pubdate) > 0:
return self.pubDate
else:
return pubdate
def cute_pubdate( self):
seconds_in_a_day = 86400
try:
timestamp = int(mktime_tz( parsedate_tz( self.pubDate)))
except:
return _("(unknown)")
diff = int((time.time()+1)/seconds_in_a_day) - int(timestamp/seconds_in_a_day)
if diff == 0:
return _("Today")
if diff == 1:
return _("Yesterday")
if diff < 7:
return str(datetime.fromtimestamp( timestamp).strftime( "%A"))
return str(datetime.fromtimestamp( timestamp).strftime( "%x"))
def calculate_filesize( self):
try:
self.length = str(os.path.getsize( self.local_filename()))
except:
log( 'Could not get filesize for %s.', self.url)
def equals( self, other_item):
if other_item == None:
return False
return self.url == other_item.url
def channelsToModel( channels):
new_model = gtk.ListStore( gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_INT, gobject.TYPE_STRING, gobject.TYPE_INT, gobject.TYPE_STRING, gobject.TYPE_INT, gobject.TYPE_STRING, gtk.gdk.Pixbuf)
pos = 0
for channel in channels:
new_episodes = channel.get_new_episodes()
count = len(channel)
count_new = len(new_episodes)
new_iter = new_model.append()
new_model.set( new_iter, 0, channel.url)
new_model.set( new_iter, 1, channel.title)
new_model.set( new_iter, 2, count)
if count_new == 0:
new_model.set( new_iter, 3, '')
elif count_new == 1:
new_model.set( new_iter, 3, _('New episode: %s') % ( new_episodes[-1].title ) + ' ')
else:
new_model.set( new_iter, 3, _('%s new episodes') % count_new + ' ')
if count_new:
new_model.set( new_iter, 4, pango.WEIGHT_BOLD)
new_model.set( new_iter, 5, str(count_new))
else:
new_model.set( new_iter, 4, pango.WEIGHT_NORMAL)
new_model.set( new_iter, 5, '')
new_model.set( new_iter, 6, pos)
new_model.set( new_iter, 7, '%s\n<small>%s</small>' % ( saxutils.escape( channel.title), saxutils.escape( channel.description.split('\n')[0]), ))
channel_cover_found = False
if os.path.exists( channel.cover_file) and os.path.getsize(channel.cover_file) > 0:
try:
new_model.set( new_iter, 8, gtk.gdk.pixbuf_new_from_file_at_size( channel.cover_file, 32, 32))
channel_cover_found = True
except:
exctype, value = sys.exc_info()[:2]
log( 'Could not convert icon file "%s", error was "%s"', channel.cover_file, value )
if not channel_cover_found:
iconsize = gtk.icon_size_from_name('channel-icon')
if not iconsize:
iconsize = gtk.icon_size_register('channel-icon',32,32)
icon_theme = gtk.icon_theme_get_default()
globe_icon_name = 'applications-internet'
try:
new_model.set( new_iter, 8, icon_theme.load_icon(globe_icon_name, iconsize, 0))
except:
log( 'Cannot load "%s" icon (using an old or incomplete icon theme?)', globe_icon_name)
new_model.set( new_iter, 8, None)
pos = pos + 1
return new_model
def load_channels( load_items = True, force_update = False, callback_proc = None, callback_url = None, callback_error = None, offline = False):
importer = opml.Importer( libgpodder.gPodderLib().channel_opml_file)
result = []
count = 0
for item in importer.items:
callback_proc and callback_proc( count, len( importer.items))
callback_url and callback_url( item['url'])
result.append( podcastChannel.create_from_dict( item, load_items = load_items, force_update = force_update, callback_error = callback_error, offline = offline))
count += 1
return result
def save_channels( channels):
exporter = opml.Exporter( libgpodder.gPodderLib().channel_opml_file)
exporter.write( channels)
class LocalDBReader( object):
def __init__( self, url):
self.url = url
def get_text( self, nodelist):
return ''.join( [ node.data for node in nodelist if node.nodeType == node.TEXT_NODE ])
def get_text_by_first_node( self, element, name):
return self.get_text( element.getElementsByTagName( name)[0].childNodes)
def get_episode_from_element( self, channel, element):
episode = podcastItem( channel)
episode.title = self.get_text_by_first_node( element, 'title')
episode.description = self.get_text_by_first_node( element, 'description')
episode.url = self.get_text_by_first_node( element, 'url')
episode.link = self.get_text_by_first_node( element, 'link')
episode.guid = self.get_text_by_first_node( element, 'guid')
episode.pubDate = self.get_text_by_first_node( element, 'pubDate')
episode.calculate_filesize()
return episode
def load_and_clean( self, filename):
"""
Clean-up a LocalDB XML file that could potentially contain
"unbound prefix" XML elements (generated by the old print-based
LocalDB code). The code removes those lines to make the new
DOM parser happy.
This should be removed in a future version.
"""
lines = []
for line in open(filename).read().split('\n'):
if not line.startswith('<gpodder:info'):
lines.append( line)
return '\n'.join( lines)
def read( self, filename):
doc = xml.dom.minidom.parseString( self.load_and_clean( filename))
rss = doc.getElementsByTagName('rss')[0]
channel_element = rss.getElementsByTagName('channel')[0]
channel = podcastChannel( url = self.url)
channel.title = self.get_text_by_first_node( channel_element, 'title')
channel.description = self.get_text_by_first_node( channel_element, 'description')
channel.link = self.get_text_by_first_node( channel_element, 'link')
channel.load_settings()
for episode_element in rss.getElementsByTagName('item'):
episode = self.get_episode_from_element( channel, episode_element)
channel.append( episode)
return channel
class LocalDBWriter(object):
def __init__( self, filename):
self.filename = filename
def create_node( self, doc, name, content):
node = doc.createElement( name)
node.appendChild( doc.createTextNode( content))
return node
def create_item( self, doc, episode):
item = doc.createElement( 'item')
item.appendChild( self.create_node( doc, 'title', episode.title))
item.appendChild( self.create_node( doc, 'description', episode.description))
item.appendChild( self.create_node( doc, 'url', episode.url))
item.appendChild( self.create_node( doc, 'link', episode.link))
item.appendChild( self.create_node( doc, 'guid', episode.guid))
item.appendChild( self.create_node( doc, 'pubDate', episode.pubDate))
return item
def write( self, channel):
doc = xml.dom.minidom.Document()
rss = doc.createElement( 'rss')
rss.setAttribute( 'version', '1.0')
doc.appendChild( rss)
channele = doc.createElement( 'channel')
channele.appendChild( self.create_node( doc, 'title', channel.title))
channele.appendChild( self.create_node( doc, 'description', channel.description))
channele.appendChild( self.create_node( doc, 'link', channel.link))
rss.appendChild( channele)
for episode in channel:
if episode.is_downloaded():
rss.appendChild( self.create_item( doc, episode))
try:
fp = open( self.filename, 'w')
fp.write( doc.toxml( encoding = 'utf-8'))
fp.close()
except:
log( 'Could not open file for writing: %s', self.filename, sender = self)
return False
return True