gpodder/src/gpodder/schema.py

339 lines
11 KiB
Python

# -*- coding: utf-8 -*-
#
# gPodder - A media aggregator and podcast client
# Copyright (c) 2005-2018 The gPodder Team
#
# gPodder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# gPodder is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# gpodder.schema - Database schema update and migration facility
# Thomas Perl <thp@gpodder.org>; 2011-02-01
import logging
import shutil
import time
from sqlite3 import dbapi2 as sqlite
from gpodder import util
logger = logging.getLogger(__name__)
EpisodeColumns = (
'podcast_id',
'title',
'description',
'url',
'published',
'guid',
'link',
'file_size',
'mime_type',
'state',
'is_new',
'archive',
'download_filename',
'total_time',
'current_position',
'current_position_updated',
'last_playback',
'payment_url',
'description_html',
'episode_art_url',
'chapters',
)
PodcastColumns = (
'title',
'url',
'link',
'description',
'cover_url',
'auth_username',
'auth_password',
'http_last_modified',
'http_etag',
'auto_archive_episodes',
'download_folder',
'pause_subscription',
'section',
'payment_url',
'download_strategy',
'sync_to_mp3_player',
'cover_thumb',
)
CURRENT_VERSION = 8
# SQL commands to upgrade old database versions to new ones
# Each item is a tuple (old_version, new_version, sql_commands) that should be
# applied to the database to migrate from old_version to new_version.
UPGRADE_SQL = [
# Version 2: Section labels for the podcast list
(1, 2, """
ALTER TABLE podcast ADD COLUMN section TEXT NOT NULL DEFAULT ''
"""),
# Version 3: Flattr integration (+ invalidate http_* fields to force
# a feed update, so that payment URLs are parsed during the next check)
(2, 3, """
ALTER TABLE podcast ADD COLUMN payment_url TEXT NULL DEFAULT NULL
ALTER TABLE episode ADD COLUMN payment_url TEXT NULL DEFAULT NULL
UPDATE podcast SET http_last_modified=NULL, http_etag=NULL
"""),
# Version 4: Per-podcast download strategy management
(3, 4, """
ALTER TABLE podcast ADD COLUMN download_strategy INTEGER NOT NULL DEFAULT 0
"""),
# Version 5: Per-podcast MP3 player device synchronization option
(4, 5, """
ALTER TABLE podcast ADD COLUMN sync_to_mp3_player INTEGER NOT NULL DEFAULT 1
"""),
# Version 6: Add thumbnail for cover art
(5, 6, """
ALTER TABLE podcast ADD COLUMN cover_thumb BLOB NULL DEFAULT NULL
"""),
# Version 7: Add HTML description
(6, 7, """
ALTER TABLE episode ADD COLUMN description_html TEXT NOT NULL DEFAULT ''
UPDATE episode SET description_html=description WHERE is_html(description)
UPDATE episode SET description=remove_html_tags(description_html) WHERE is_html(description)
UPDATE podcast SET http_last_modified=NULL, http_etag=NULL
"""),
# Version 8: Add episode thumbnail URL and chapters
(7, 8, """
ALTER TABLE episode ADD COLUMN episode_art_url TEXT NULL DEFAULT NULL
ALTER TABLE episode ADD COLUMN chapters TEXT NULL DEFAULT NULL
UPDATE podcast SET http_last_modified=NULL, http_etag=NULL
"""),
]
def initialize_database(db):
# Create table for podcasts
db.execute("""
CREATE TABLE podcast (
id INTEGER PRIMARY KEY NOT NULL,
title TEXT NOT NULL DEFAULT '',
url TEXT NOT NULL DEFAULT '',
link TEXT NOT NULL DEFAULT '',
description TEXT NOT NULL DEFAULT '',
cover_url TEXT NULL DEFAULT NULL,
auth_username TEXT NULL DEFAULT NULL,
auth_password TEXT NULL DEFAULT NULL,
http_last_modified TEXT NULL DEFAULT NULL,
http_etag TEXT NULL DEFAULT NULL,
auto_archive_episodes INTEGER NOT NULL DEFAULT 0,
download_folder TEXT NOT NULL DEFAULT '',
pause_subscription INTEGER NOT NULL DEFAULT 0,
section TEXT NOT NULL DEFAULT '',
payment_url TEXT NULL DEFAULT NULL,
download_strategy INTEGER NOT NULL DEFAULT 0,
sync_to_mp3_player INTEGER NOT NULL DEFAULT 1,
cover_thumb BLOB NULL DEFAULT NULL
)
""")
INDEX_SQL = """
CREATE UNIQUE INDEX idx_podcast_url ON podcast (url)
CREATE UNIQUE INDEX idx_podcast_download_folder ON podcast (download_folder)
"""
for sql in INDEX_SQL.strip().split('\n'):
db.execute(sql)
# Create table for episodes
db.execute("""
CREATE TABLE episode (
id INTEGER PRIMARY KEY NOT NULL,
podcast_id INTEGER NOT NULL,
title TEXT NOT NULL DEFAULT '',
description TEXT NOT NULL DEFAULT '',
url TEXT NOT NULL,
published INTEGER NOT NULL DEFAULT 0,
guid TEXT NOT NULL,
link TEXT NOT NULL DEFAULT '',
file_size INTEGER NOT NULL DEFAULT 0,
mime_type TEXT NOT NULL DEFAULT 'application/octet-stream',
state INTEGER NOT NULL DEFAULT 0,
is_new INTEGER NOT NULL DEFAULT 0,
archive INTEGER NOT NULL DEFAULT 0,
download_filename TEXT NULL DEFAULT NULL,
total_time INTEGER NOT NULL DEFAULT 0,
current_position INTEGER NOT NULL DEFAULT 0,
current_position_updated INTEGER NOT NULL DEFAULT 0,
last_playback INTEGER NOT NULL DEFAULT 0,
payment_url TEXT NULL DEFAULT NULL,
description_html TEXT NOT NULL DEFAULT '',
episode_art_url TEXT NULL DEFAULT NULL,
chapters TEXT NULL DEFAULT NULL
)
""")
INDEX_SQL = """
CREATE INDEX idx_episode_podcast_id ON episode (podcast_id)
CREATE UNIQUE INDEX idx_episode_download_filename ON episode (podcast_id, download_filename)
CREATE UNIQUE INDEX idx_episode_guid ON episode (podcast_id, guid)
CREATE INDEX idx_episode_state ON episode (state)
CREATE INDEX idx_episode_is_new ON episode (is_new)
CREATE INDEX idx_episode_archive ON episode (archive)
CREATE INDEX idx_episode_published ON episode (published)
"""
for sql in INDEX_SQL.strip().split('\n'):
db.execute(sql)
# Create table for version info / metadata + insert initial data
db.execute("""CREATE TABLE version (version integer)""")
db.execute("INSERT INTO version (version) VALUES (%d)" % CURRENT_VERSION)
db.commit()
def upgrade(db, filename):
if not list(db.execute('PRAGMA table_info(version)')):
initialize_database(db)
return
db.create_function('is_html', 1, util.is_html)
db.create_function('remove_html_tags', 1, util.remove_html_tags)
version = db.execute('SELECT version FROM version').fetchone()[0]
if version == CURRENT_VERSION:
return
# We are trying an upgrade - save the current version of the DB
backup = '%s_upgraded-v%d_%d' % (filename, int(version), int(time.time()))
try:
shutil.copy(filename, backup)
except Exception as e:
raise Exception('Cannot create DB backup before upgrade: ' + e)
db.execute("DELETE FROM version")
for old_version, new_version, upgrade in UPGRADE_SQL:
if version == old_version:
for sql in upgrade.strip().split('\n'):
db.execute(sql)
version = new_version
assert version == CURRENT_VERSION
db.execute("INSERT INTO version (version) VALUES (%d)" % version)
db.commit()
if version != CURRENT_VERSION:
raise Exception('Database schema version unknown')
def convert_gpodder2_db(old_db, new_db):
"""Convert gPodder 2.x databases to the new format
Both arguments should be SQLite3 connections to the
corresponding databases.
"""
old_db = sqlite.connect(old_db)
new_db_filename = new_db
new_db = sqlite.connect(new_db)
upgrade(new_db, new_db_filename)
# Copy data for podcasts
old_cur = old_db.cursor()
columns = [x[1] for x in old_cur.execute('PRAGMA table_info(channels)')]
for row in old_cur.execute('SELECT * FROM channels'):
row = dict(list(zip(columns, row)))
values = (
row['id'],
row['override_title'] or row['title'],
row['url'],
row['link'],
row['description'],
row['image'],
row['username'] or None,
row['password'] or None,
row['last_modified'] or None,
row['etag'] or None,
row['channel_is_locked'],
row['foldername'],
not row['feed_update_enabled'],
'',
None,
0,
row['sync_to_devices'],
None,
)
new_db.execute("""
INSERT INTO podcast VALUES (%s)
""" % ', '.join('?' * len(values)), values)
old_cur.close()
# Copy data for episodes
old_cur = old_db.cursor()
columns = [x[1] for x in old_cur.execute('PRAGMA table_info(episodes)')]
for row in old_cur.execute('SELECT * FROM episodes'):
row = dict(list(zip(columns, row)))
values = (
row['id'],
row['channel_id'],
row['title'],
row['description'],
row['url'],
row['pubDate'],
row['guid'],
row['link'],
row['length'],
row['mimetype'],
row['state'],
not row['played'],
row['locked'],
row['filename'],
row['total_time'],
row['current_position'],
row['current_position_updated'],
0,
None,
'',
None,
None,
)
new_db.execute("""
INSERT INTO episode VALUES (%s)
""" % ', '.join('?' * len(values)), values)
# do 6 -> 7 upgrade (description_html)
new_db.create_function('is_html', 1, util.is_html)
new_db.create_function('remove_html_tags', 1, util.remove_html_tags)
new_db.execute("UPDATE episode SET description_html=description WHERE is_html(description)")
new_db.execute("UPDATE episode SET description=remove_html_tags(description_html) WHERE is_html(description)")
new_db.execute("UPDATE podcast SET http_last_modified=NULL, http_etag=NULL")
old_cur.close()
old_db.close()
new_db.commit()
new_db.close()
def check_data(db):
# All episodes must be assigned to a podcast
orphan_episodes = db.get('SELECT COUNT(id) FROM episode '
'WHERE podcast_id NOT IN (SELECT id FROM podcast)')
if orphan_episodes > 0:
logger.error('Orphaned episodes found in database')