mirror of
git://git.savannah.gnu.org/guix/data-service.git
synced 2023-12-14 03:23:03 +01:00
7251c7d653
Now that squee cooperates with suspendable ports, this is unnecessary. Use a connection pool to still support running queries in parallel using multiple connections.
642 lines
19 KiB
Scheme
642 lines
19 KiB
Scheme
;;; Guix Data Service -- Information about Guix over time
|
|
;;; Copyright © 2020 Christopher Baines <mail@cbaines.net>
|
|
;;;
|
|
;;; This program is free software: you can redistribute it and/or
|
|
;;; modify it under the terms of the GNU Affero General Public License
|
|
;;; as published by the Free Software Foundation, either version 3 of
|
|
;;; the License, or (at your option) any later version.
|
|
;;;
|
|
;;; This program is distributed in the hope that it will be useful,
|
|
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
;;; Affero General Public License for more details.
|
|
;;;
|
|
;;; You should have received a copy of the GNU Affero General Public
|
|
;;; License along with this program. If not, see
|
|
;;; <http://www.gnu.org/licenses/>.
|
|
|
|
(define-module (guix-data-service data-deletion)
|
|
#:use-module (srfi srfi-1)
|
|
#:use-module (srfi srfi-11)
|
|
#:use-module (ice-9 match)
|
|
#:use-module (ice-9 threads)
|
|
#:use-module (squee)
|
|
#:use-module (fibers)
|
|
#:use-module (guix-data-service utils)
|
|
#:use-module (guix-data-service database)
|
|
#:use-module (guix-data-service model git-branch)
|
|
#:use-module (guix-data-service model package-derivation-by-guix-revision-range)
|
|
#:export (delete-guix-revisions
|
|
delete-data-for-branch
|
|
delete-revisions-from-branch-except-most-recent-n
|
|
delete-revisions-for-all-branches-except-most-recent-n
|
|
delete-data-for-all-deleted-branches
|
|
delete-unreferenced-derivations))
|
|
|
|
(define (delete-guix-revisions conn git-repository-id commits)
|
|
(define (delete-unreferenced-package-derivations)
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM package_derivations
|
|
WHERE NOT EXISTS (
|
|
SELECT 1
|
|
FROM guix_revision_package_derivations
|
|
WHERE guix_revision_package_derivations.package_derivation_id =
|
|
package_derivations.id
|
|
)"))
|
|
|
|
(define (delete-unreferenced-lint-warnings)
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM lint_warnings
|
|
WHERE NOT EXISTS (
|
|
SELECT 1
|
|
FROM guix_revision_lint_warnings
|
|
WHERE guix_revision_lint_warnings.lint_warning_id =
|
|
lint_warnings.id
|
|
)"))
|
|
|
|
(define (delete-unreferenced-lint-checkers)
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM lint_checkers
|
|
WHERE NOT EXISTS (
|
|
SELECT 1
|
|
FROM guix_revision_lint_checkers
|
|
WHERE guix_revision_lint_checkers.lint_checker_id =
|
|
lint_checkers.id
|
|
)"))
|
|
|
|
(let ((guix-revision-ids
|
|
(map
|
|
car
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
SELECT guix_revisions.id
|
|
FROM (VALUES "
|
|
(string-join
|
|
(map (lambda (commit)
|
|
(string-append "('" commit "')"))
|
|
commits)
|
|
", ")
|
|
") AS commits
|
|
INNER JOIN guix_revisions
|
|
ON guix_revisions.commit = commits.column1
|
|
WHERE guix_revisions.git_repository_id = "
|
|
(number->string git-repository-id) " AND
|
|
commits.column1 NOT IN (
|
|
SELECT commit
|
|
FROM git_commits
|
|
)")))))
|
|
|
|
(unless (null? guix-revision-ids)
|
|
(for-each
|
|
(lambda (table)
|
|
(exec-query
|
|
conn
|
|
(simple-format
|
|
#f
|
|
"
|
|
DELETE FROM ~A WHERE ~A IN (VALUES ~A)"
|
|
table
|
|
(if (string=? table
|
|
"guix_revision_package_derivations")
|
|
"revision_id"
|
|
"guix_revision_id")
|
|
(string-join
|
|
(map (lambda (guix-revision-id)
|
|
(string-append "(" guix-revision-id ")"))
|
|
guix-revision-ids)
|
|
", "))))
|
|
'("channel_instances"
|
|
"guix_revision_channel_news_entries"
|
|
"guix_revision_lint_checkers"
|
|
"guix_revision_lint_warnings"
|
|
"guix_revision_package_derivations"
|
|
"guix_revision_system_test_derivations"
|
|
"guix_revision_package_derivation_distribution_counts"))
|
|
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
DELETE FROM guix_revisions
|
|
WHERE id IN ("
|
|
(string-join guix-revision-ids ", ")
|
|
")
|
|
AND id NOT IN (
|
|
SELECT guix_revisions.id
|
|
FROM guix_revisions
|
|
INNER JOIN git_branches ON
|
|
git_branches.git_repository_id = guix_revisions.git_repository_id
|
|
INNER JOIN git_commits ON
|
|
git_commits.git_branch_id = git_branches.id AND
|
|
git_commits.commit = guix_revisions.commit
|
|
)"))
|
|
|
|
(delete-unreferenced-package-derivations)
|
|
(delete-unreferenced-lint-warnings)
|
|
(delete-unreferenced-lint-checkers))))
|
|
|
|
(define (delete-revisions-from-branch conn git-repository-id branch-name commits)
|
|
(define (delete-jobs conn)
|
|
(for-each
|
|
(lambda (table)
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
DELETE FROM " table "
|
|
WHERE job_id IN (
|
|
SELECT id
|
|
FROM load_new_guix_revision_jobs
|
|
WHERE git_repository_id = " (number->string git-repository-id) " AND
|
|
commit IN ("
|
|
(string-join
|
|
(map (lambda (commit)
|
|
(string-append "'" commit "'"))
|
|
commits)
|
|
", ")
|
|
")
|
|
)")))
|
|
'("load_new_guix_revision_job_events"
|
|
"load_new_guix_revision_job_logs"))
|
|
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
DELETE FROM load_new_guix_revision_jobs
|
|
WHERE git_repository_id = " (number->string git-repository-id) " AND
|
|
commit IN ("
|
|
(string-join
|
|
(map (lambda (commit)
|
|
(string-append "'" commit "'"))
|
|
commits)
|
|
", ")
|
|
")")))
|
|
|
|
(define (delete-from-git-commits conn)
|
|
(exec-query
|
|
conn
|
|
(simple-format
|
|
#f
|
|
"
|
|
DELETE FROM git_commits
|
|
WHERE id IN (
|
|
SELECT git_commits.id
|
|
FROM git_commits
|
|
INNER JOIN git_branches
|
|
ON git_branches.id = git_commits.git_branch_id
|
|
WHERE git_branches.git_repository_id = ~A
|
|
AND git_branches.name = '~A'
|
|
AND git_commits.commit IN (~A)
|
|
)"
|
|
git-repository-id
|
|
branch-name
|
|
(string-join
|
|
(map (lambda (commit)
|
|
(string-append "'" commit "'"))
|
|
commits)
|
|
", "))))
|
|
|
|
(catch 'psql-query-error
|
|
(lambda ()
|
|
(with-postgresql-transaction
|
|
conn
|
|
(lambda (conn)
|
|
(obtain-advisory-transaction-lock
|
|
conn
|
|
'delete-revisions-from-branch)
|
|
|
|
(exec-query conn "SET LOCAL lock_timeout = '5s';")
|
|
|
|
(delete-from-git-commits conn)
|
|
(delete-jobs conn)
|
|
|
|
(let ((git-branch-id
|
|
(git-branch-for-repository-and-name conn
|
|
git-repository-id
|
|
branch-name)))
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
DROP TABLE IF EXISTS package_derivations_by_guix_revision_range_git_branch_"
|
|
(number->string git-branch-id) ";")))
|
|
|
|
(delete-guix-revisions conn git-repository-id commits))))
|
|
(lambda (key . args)
|
|
(simple-format
|
|
(current-error-port)
|
|
"error when attempting to delete revisions from branch: ~A ~A\n"
|
|
key args)
|
|
|
|
(apply throw key args))))
|
|
|
|
(define (delete-data-for-branch conn git-repository-id branch-name)
|
|
(define commits
|
|
(map car
|
|
(exec-query conn
|
|
"
|
|
SELECT git_commits.commit
|
|
FROM git_branches
|
|
INNER JOIN git_commits
|
|
ON git_branches.id = git_commits.git_branch_id
|
|
WHERE git_repository_id = $1
|
|
AND git_branches.name = $2"
|
|
(list (number->string git-repository-id)
|
|
branch-name))))
|
|
|
|
(delete-revisions-from-branch conn
|
|
git-repository-id
|
|
branch-name
|
|
commits)
|
|
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM git_branches WHERE name = $1 AND git_repository_id = $2"
|
|
(list branch-name
|
|
(number->string git-repository-id))))
|
|
|
|
(define (delete-revisions-from-branch-except-most-recent-n conn
|
|
git-repository-id
|
|
branch-name
|
|
n)
|
|
(define commits
|
|
(map car
|
|
(exec-query conn
|
|
"
|
|
SELECT commit
|
|
FROM git_commits
|
|
INNER JOIN git_branches
|
|
ON git_branches.id = git_commits.git_branch_id
|
|
WHERE git_repository_id = $1 AND name = $2
|
|
ORDER BY datetime DESC
|
|
OFFSET $3"
|
|
(list (number->string git-repository-id)
|
|
branch-name
|
|
(number->string n)))))
|
|
|
|
(unless (null? commits)
|
|
(simple-format #t "deleting ~A commits from ~A\n" (length commits) branch-name)
|
|
(delete-revisions-from-branch conn
|
|
git-repository-id
|
|
branch-name
|
|
commits)
|
|
|
|
(simple-format #t "repopulating package_derivations_by_guix_revision_range\n")
|
|
(insert-guix-revision-package-derivation-entries conn
|
|
(number->string
|
|
git-repository-id)
|
|
branch-name)))
|
|
|
|
(define (delete-revisions-for-all-branches-except-most-recent-n n)
|
|
(with-postgresql-connection
|
|
"data-deletion"
|
|
(lambda (conn)
|
|
(for-each
|
|
(match-lambda
|
|
((git-repository-id branch-name)
|
|
(delete-revisions-from-branch-except-most-recent-n
|
|
conn
|
|
(string->number git-repository-id)
|
|
branch-name
|
|
n)))
|
|
(exec-query
|
|
conn
|
|
"
|
|
SELECT DISTINCT git_repository_id, name
|
|
FROM git_branches")))))
|
|
|
|
(define (delete-data-for-all-branches-but-master)
|
|
(with-postgresql-connection
|
|
"data-deletion"
|
|
(lambda (conn)
|
|
(for-each
|
|
(lambda (branch-name)
|
|
(delete-data-for-branch conn 1 branch-name))
|
|
(map
|
|
car
|
|
(exec-query
|
|
conn
|
|
"
|
|
SELECT DISTINCT name
|
|
FROM git_branches
|
|
WHERE git_repository_id = 1 AND name != 'master'"))))))
|
|
|
|
(define (delete-data-for-all-deleted-branches)
|
|
(with-postgresql-connection
|
|
"data-deletion"
|
|
(lambda (conn)
|
|
(for-each
|
|
(match-lambda
|
|
((name git-repository-id)
|
|
(simple-format #t "deleting data for ~A (~A)\n"
|
|
name git-repository-id)
|
|
(delete-data-for-branch conn
|
|
(string->number git-repository-id)
|
|
name)))
|
|
(exec-query
|
|
conn
|
|
"
|
|
SELECT name, git_repository_id
|
|
FROM (
|
|
SELECT DISTINCT ON (name, git_repository_id)
|
|
name, git_repository_id, commit
|
|
FROM git_branches
|
|
INNER JOIN git_commits
|
|
ON git_commits.git_branch_id = git_branches.id
|
|
ORDER BY git_repository_id, name, datetime DESC
|
|
) AS git_branches_latest_revision
|
|
WHERE commit = ''")))))
|
|
|
|
(define* (delete-unreferenced-derivations #:key
|
|
(batch-size 100000))
|
|
(define (delete-builds-for-derivation-output-details-set
|
|
conn
|
|
derivation-output-details-set-id)
|
|
(let ((build-ids
|
|
(map car
|
|
(exec-query
|
|
conn
|
|
"
|
|
SELECT id
|
|
FROM builds
|
|
WHERE derivation_output_details_set_id = $1"
|
|
(list derivation-output-details-set-id)))))
|
|
|
|
(unless (null? build-ids)
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
DELETE FROM build_status WHERE build_id IN ("
|
|
(string-join build-ids ",")
|
|
")"))
|
|
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
DELETE FROM latest_build_status WHERE build_id IN ("
|
|
(string-join build-ids ",")
|
|
")"))
|
|
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
DELETE FROM builds WHERE id IN ("
|
|
(string-join build-ids ",")
|
|
")")))))
|
|
|
|
(define (delete-blocked-builds-for-derivation-output-details-set
|
|
conn
|
|
derivation-output-details-set-id)
|
|
;; Do this for each build server individually, as that helps PostgreSQL
|
|
;; efficiently check the partitions
|
|
(let ((build-server-ids
|
|
(map
|
|
car
|
|
(exec-query
|
|
conn
|
|
"SELECT id FROM build_servers"))))
|
|
(for-each
|
|
(lambda (build-server-id)
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM blocked_builds
|
|
WHERE build_server_id = $1
|
|
AND (
|
|
blocked_derivation_output_details_set_id = $2 OR blocking_derivation_output_details_set_id = $3
|
|
)"
|
|
(list build-server-id
|
|
derivation-output-details-set-id
|
|
derivation-output-details-set-id)))
|
|
build-server-ids)))
|
|
|
|
(define (delete-unreferenced-derivations-source-files conn)
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivation_source_files
|
|
WHERE NOT EXISTS (
|
|
SELECT 1
|
|
FROM derivation_sources
|
|
WHERE derivation_source_file_id = derivation_source_files.id
|
|
)"))
|
|
|
|
(define (maybe-delete-derivation conn id)
|
|
(match (map
|
|
car
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivation_outputs WHERE derivation_id = $1
|
|
AND NOT EXISTS (
|
|
SELECT 1
|
|
FROM derivation_inputs
|
|
WHERE derivation_output_id IN (
|
|
SELECT derivation_outputs.id
|
|
FROM derivation_outputs
|
|
WHERE derivation_id = $1
|
|
)
|
|
)
|
|
RETURNING derivation_outputs.derivation_output_details_id"
|
|
(list id)))
|
|
(() 0)
|
|
((derivation-output-details-ids ...)
|
|
|
|
(for-each
|
|
(lambda (derivation-output-details-id)
|
|
(unless (string->number derivation-output-details-id)
|
|
(error
|
|
(simple-format #f "derivation-output-details-id: ~A is not a number"
|
|
derivation-output-details-id)))
|
|
|
|
(match (exec-query
|
|
conn
|
|
"
|
|
SELECT COUNT(*) FROM derivation_outputs
|
|
WHERE derivation_output_details_id = $1"
|
|
(list derivation-output-details-id))
|
|
(((count))
|
|
(when (eq? (string->number count)
|
|
0)
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivation_output_details
|
|
WHERE id = $1"
|
|
(list derivation-output-details-id))))))
|
|
derivation-output-details-ids)
|
|
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivation_sources WHERE derivation_id = $1"
|
|
(list id))
|
|
|
|
(match (exec-query
|
|
conn
|
|
"
|
|
SELECT derivation_output_details_set_id
|
|
FROM derivations_by_output_details_set
|
|
WHERE derivation_id = $1"
|
|
(list id))
|
|
(((derivation-output-details-set-id))
|
|
(match (exec-query
|
|
conn
|
|
"
|
|
SELECT COUNT(*) FROM derivations_by_output_details_set
|
|
WHERE derivation_output_details_set_id = $1"
|
|
(list derivation-output-details-set-id))
|
|
(((count))
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivations_by_output_details_set
|
|
WHERE derivation_id = $1"
|
|
(list id))
|
|
|
|
(when (<= (string->number count)
|
|
1)
|
|
(delete-builds-for-derivation-output-details-set
|
|
conn
|
|
derivation-output-details-set-id)
|
|
|
|
(delete-blocked-builds-for-derivation-output-details-set
|
|
conn
|
|
derivation-output-details-set-id)
|
|
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivation_output_details_sets
|
|
WHERE id = $1"
|
|
(list derivation-output-details-set-id)))))))
|
|
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivation_inputs WHERE derivation_id = $1"
|
|
(list id))
|
|
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivations WHERE id = $1"
|
|
(list id))
|
|
|
|
1)))
|
|
|
|
(define (delete-batch conn connection-pool)
|
|
(let* ((derivations
|
|
(with-time-logging "fetching batch of derivations"
|
|
(map car
|
|
(exec-query
|
|
conn
|
|
"
|
|
SELECT DISTINCT derivation_id
|
|
FROM derivation_outputs
|
|
WHERE NOT EXISTS (
|
|
-- This isn't a perfect check, as this will select some derivations that are
|
|
-- used, but maybe-delete-derivation includes the proper check
|
|
SELECT 1
|
|
FROM derivation_inputs
|
|
WHERE derivation_output_id = derivation_outputs.id
|
|
) AND NOT EXISTS (
|
|
SELECT 1
|
|
FROM package_derivations
|
|
WHERE package_derivations.derivation_id = derivation_outputs.derivation_id
|
|
) AND NOT EXISTS (
|
|
SELECT 1 FROM channel_instances
|
|
WHERE derivation_id = derivation_outputs.derivation_id
|
|
) AND NOT EXISTS (
|
|
SELECT 1 FROM guix_revision_system_test_derivations
|
|
WHERE derivation_id = derivation_outputs.derivation_id
|
|
) LIMIT $1"
|
|
(list (number->string batch-size))))))
|
|
(derivations-count (length derivations)))
|
|
(let ((deleted-count 0))
|
|
(with-time-logging
|
|
(simple-format #f
|
|
"Looking at ~A derivations"
|
|
derivations-count)
|
|
(n-par-for-each
|
|
8
|
|
(lambda (derivation-id)
|
|
(unless (string->number derivation-id)
|
|
(error
|
|
(simple-format #f "derivation-id: ~A is not a number"
|
|
derivation-id)))
|
|
|
|
(let ((val
|
|
(call-with-resource-from-pool connection-pool
|
|
(lambda (conn)
|
|
(catch 'psql-query-error
|
|
(lambda ()
|
|
(with-postgresql-transaction
|
|
conn
|
|
(lambda (conn)
|
|
(exec-query
|
|
conn
|
|
"
|
|
SET CONSTRAINTS derivations_by_output_details_set_derivation_id_fkey DEFERRED")
|
|
|
|
(exec-query conn "SET LOCAL lock_timeout = '5s';")
|
|
|
|
(maybe-delete-derivation conn
|
|
derivation-id))))
|
|
(lambda (key . args)
|
|
(simple-format
|
|
(current-error-port)
|
|
"error when attempting to delete derivation: ~A ~A\n"
|
|
key args)
|
|
|
|
0))))))
|
|
(monitor
|
|
(set! deleted-count
|
|
(+ val deleted-count)))))
|
|
derivations))
|
|
|
|
(simple-format (current-error-port)
|
|
"Deleted ~A derivations\n"
|
|
deleted-count)
|
|
deleted-count)))
|
|
|
|
(run-fibers
|
|
(lambda ()
|
|
(let* ((connection-pool
|
|
(make-resource-pool
|
|
(lambda ()
|
|
(open-postgresql-connection "data-deletion" #f))
|
|
8)))
|
|
|
|
(with-postgresql-connection
|
|
"data-deletion"
|
|
(lambda (conn)
|
|
(obtain-advisory-transaction-lock
|
|
conn
|
|
'delete-unreferenced-derivations)
|
|
|
|
(let loop ((total-deleted 0))
|
|
(let ((batch-deleted-count (delete-batch conn connection-pool)))
|
|
(if (eq? 0 batch-deleted-count)
|
|
(begin
|
|
(with-time-logging
|
|
"Deleting unused derivation_source_files entries"
|
|
(delete-unreferenced-derivations-source-files conn))
|
|
(simple-format
|
|
(current-output-port)
|
|
"Finished deleting derivations, deleted ~A in total\n"
|
|
total-deleted))
|
|
(loop (+ total-deleted batch-deleted-count)))))))))))
|