Better GUID tracking with fallback URL (Maemo bug 12073)

Thanks to Jonas Kölker for the initial patch.
This commit is contained in:
Thomas Perl 2011-04-04 11:43:53 +02:00
parent 0de167bc4e
commit dc9ddb9ac6

View file

@ -164,9 +164,6 @@ class PodcastEpisode(PodcastModelObject):
episode.description = entry.get('subtitle', '')
episode.guid = entry.get('id', '')
if not episode.guid:
# Fallback to the entry URL if there is no GUID
episode.guid = entry.get('link', '')
if entry.get('updated_parsed', None):
episode.published = rfc822.mktime_tz(entry.updated_parsed+(0,))
@ -816,12 +813,27 @@ class PodcastChannel(PodcastModelObject):
# Get most recent published of all episodes
last_published = self.db.get_last_published(self) or 0
# Keep track of episode GUIDs currently seen in the feed
seen_guids = set()
# Search all entries for new episodes
for entry in entries:
try:
episode = self.EpisodeClass.from_feedparser_entry(entry, self, mimetype_prefs)
if episode is not None and not episode.title:
episode.title, ext = os.path.splitext(os.path.basename(episode.url))
if episode is not None:
if not episode.title:
log('Using filename as title for episode at %s.', \
episode.url, sender=self)
basename = os.path.basename(episode.url)
episode.title, ext = os.path.splitext(basename)
# Maemo bug 12073
if not episode.guid:
log('Using download URL as GUID for episode %s.', \
episode.title, sender=self)
episode.guid = episode.url
seen_guids.add(episode.guid)
except Exception, e:
log('Cannot instantiate episode: %s. Skipping.', e, sender=self, traceback=True)
continue
@ -856,10 +868,10 @@ class PodcastChannel(PodcastModelObject):
# Remove "unreachable" episodes - episodes that have not been
# downloaded and that the feed does not list as downloadable anymore
if self.id is not None:
seen_guids = set(e.guid for e in feed.entries if hasattr(e, 'guid'))
episodes_to_purge = (e for e in existing if \
e.state != gpodder.STATE_DOWNLOADED and \
e.guid not in seen_guids and e.guid is not None)
e.guid not in seen_guids)
for episode in episodes_to_purge:
log('Episode removed from feed: %s (%s)', episode.title, \
episode.guid, sender=self)