data-service/guix-data-service/data-deletion.scm

724 lines
21 KiB
Scheme

;;; Guix Data Service -- Information about Guix over time
;;; Copyright © 2020 Christopher Baines <mail@cbaines.net>
;;;
;;; This program is free software: you can redistribute it and/or
;;; modify it under the terms of the GNU Affero General Public License
;;; as published by the Free Software Foundation, either version 3 of
;;; the License, or (at your option) any later version.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Affero General Public License for more details.
;;;
;;; You should have received a copy of the GNU Affero General Public
;;; License along with this program. If not, see
;;; <http://www.gnu.org/licenses/>.
(define-module (guix-data-service data-deletion)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-11)
#:use-module (ice-9 match)
#:use-module (ice-9 threads)
#:use-module (squee)
#:use-module (fibers)
#:use-module (fibers channels)
#:use-module (guix-data-service utils)
#:use-module (guix-data-service database)
#:use-module (guix-data-service model git-branch)
#:use-module (guix-data-service model package-derivation-by-guix-revision-range)
#:export (delete-guix-revisions
delete-data-for-branch
delete-revisions-from-branch-except-most-recent-n
delete-revisions-for-all-branches-except-most-recent-n
delete-data-for-all-deleted-branches
delete-unreferenced-derivations
delete-nars-for-unknown-store-paths))
(define (delete-guix-revisions conn git-repository-id commits)
(define (delete-unreferenced-package-derivations)
(exec-query
conn
"
DELETE FROM package_derivations
WHERE NOT EXISTS (
SELECT 1
FROM guix_revision_package_derivations
WHERE guix_revision_package_derivations.package_derivation_id =
package_derivations.id
)"))
(define (delete-unreferenced-lint-warnings)
(exec-query
conn
"
DELETE FROM lint_warnings
WHERE NOT EXISTS (
SELECT 1
FROM guix_revision_lint_warnings
WHERE guix_revision_lint_warnings.lint_warning_id =
lint_warnings.id
)"))
(define (delete-unreferenced-lint-checkers)
(exec-query
conn
"
DELETE FROM lint_checkers
WHERE NOT EXISTS (
SELECT 1
FROM guix_revision_lint_checkers
WHERE guix_revision_lint_checkers.lint_checker_id =
lint_checkers.id
)"))
(let ((guix-revision-ids
(map
car
(exec-query
conn
(string-append
"
SELECT guix_revisions.id
FROM (VALUES "
(string-join
(map (lambda (commit)
(string-append "('" commit "')"))
commits)
", ")
") AS commits
INNER JOIN guix_revisions
ON guix_revisions.commit = commits.column1
WHERE guix_revisions.git_repository_id = "
(number->string git-repository-id) " AND
commits.column1 NOT IN (
SELECT commit
FROM git_commits
)")))))
(unless (null? guix-revision-ids)
(for-each
(lambda (table)
(exec-query
conn
(simple-format
#f
"
DELETE FROM ~A WHERE ~A IN (VALUES ~A)"
table
(if (string=? table
"guix_revision_package_derivations")
"revision_id"
"guix_revision_id")
(string-join
(map (lambda (guix-revision-id)
(string-append "(" guix-revision-id ")"))
guix-revision-ids)
", "))))
'("channel_instances"
"guix_revision_channel_news_entries"
"guix_revision_lint_checkers"
"guix_revision_lint_warnings"
"guix_revision_package_derivations"
"guix_revision_system_test_derivations"
"guix_revision_package_derivation_distribution_counts"))
(exec-query
conn
(string-append
"
DELETE FROM guix_revisions
WHERE id IN ("
(string-join guix-revision-ids ", ")
")
AND id NOT IN (
SELECT guix_revisions.id
FROM guix_revisions
INNER JOIN git_branches ON
git_branches.git_repository_id = guix_revisions.git_repository_id
INNER JOIN git_commits ON
git_commits.git_branch_id = git_branches.id AND
git_commits.commit = guix_revisions.commit
)"))
(delete-unreferenced-package-derivations)
(delete-unreferenced-lint-warnings)
(delete-unreferenced-lint-checkers))))
(define (delete-revisions-from-branch conn git-repository-id branch-name commits)
(define (delete-jobs conn)
(for-each
(lambda (table)
(exec-query
conn
(string-append
"
DELETE FROM " table "
WHERE job_id IN (
SELECT id
FROM load_new_guix_revision_jobs
WHERE git_repository_id = " (number->string git-repository-id) " AND
commit IN ("
(string-join
(map (lambda (commit)
(string-append "'" commit "'"))
commits)
", ")
")
)")))
'("load_new_guix_revision_job_events"
"load_new_guix_revision_job_logs"))
(exec-query
conn
(string-append
"
DELETE FROM load_new_guix_revision_jobs
WHERE git_repository_id = " (number->string git-repository-id) " AND
commit IN ("
(string-join
(map (lambda (commit)
(string-append "'" commit "'"))
commits)
", ")
")")))
(define (delete-from-git-commits conn)
(exec-query
conn
(simple-format
#f
"
DELETE FROM git_commits
WHERE id IN (
SELECT git_commits.id
FROM git_commits
INNER JOIN git_branches
ON git_branches.id = git_commits.git_branch_id
WHERE git_branches.git_repository_id = ~A
AND git_branches.name = '~A'
AND git_commits.commit IN (~A)
)"
git-repository-id
branch-name
(string-join
(map (lambda (commit)
(string-append "'" commit "'"))
commits)
", "))))
(catch 'psql-query-error
(lambda ()
(with-postgresql-transaction
conn
(lambda (conn)
(obtain-advisory-transaction-lock
conn
'delete-revisions-from-branch)
(exec-query conn "SET LOCAL lock_timeout = '5s';")
(delete-from-git-commits conn)
(delete-jobs conn)
(let ((git-branch-id
(git-branch-for-repository-and-name conn
git-repository-id
branch-name)))
(exec-query
conn
(string-append
"
DROP TABLE IF EXISTS package_derivations_by_guix_revision_range_git_branch_"
(number->string git-branch-id) ";")))
(delete-guix-revisions conn git-repository-id commits))))
(lambda (key . args)
(simple-format
(current-error-port)
"error when attempting to delete revisions from branch: ~A ~A\n"
key args)
(apply throw key args))))
(define (delete-data-for-branch conn git-repository-id branch-name)
(define commits
(map car
(exec-query conn
"
SELECT git_commits.commit
FROM git_branches
INNER JOIN git_commits
ON git_branches.id = git_commits.git_branch_id
WHERE git_repository_id = $1
AND git_branches.name = $2"
(list (number->string git-repository-id)
branch-name))))
(delete-revisions-from-branch conn
git-repository-id
branch-name
commits)
(exec-query
conn
"
DELETE FROM git_branches WHERE name = $1 AND git_repository_id = $2"
(list branch-name
(number->string git-repository-id))))
(define (delete-revisions-from-branch-except-most-recent-n conn
git-repository-id
branch-name
n)
(define commits
(map car
(exec-query conn
"
SELECT commit
FROM git_commits
INNER JOIN git_branches
ON git_branches.id = git_commits.git_branch_id
WHERE git_repository_id = $1 AND name = $2
ORDER BY datetime DESC
OFFSET $3"
(list (number->string git-repository-id)
branch-name
(number->string n)))))
(unless (null? commits)
(simple-format #t "deleting ~A commits from ~A\n" (length commits) branch-name)
(delete-revisions-from-branch conn
git-repository-id
branch-name
commits)
(simple-format #t "repopulating package_derivations_by_guix_revision_range\n")
(insert-guix-revision-package-derivation-entries conn
(number->string
git-repository-id)
branch-name)))
(define (delete-revisions-for-all-branches-except-most-recent-n n)
(with-postgresql-connection
"data-deletion"
(lambda (conn)
(for-each
(match-lambda
((git-repository-id branch-name)
(delete-revisions-from-branch-except-most-recent-n
conn
(string->number git-repository-id)
branch-name
n)))
(exec-query
conn
"
SELECT DISTINCT git_repository_id, name
FROM git_branches")))))
(define (delete-data-for-all-branches-but-master)
(with-postgresql-connection
"data-deletion"
(lambda (conn)
(for-each
(lambda (branch-name)
(delete-data-for-branch conn 1 branch-name))
(map
car
(exec-query
conn
"
SELECT DISTINCT name
FROM git_branches
WHERE git_repository_id = 1 AND name != 'master'"))))))
(define (delete-data-for-all-deleted-branches)
(with-postgresql-connection
"data-deletion"
(lambda (conn)
(for-each
(match-lambda
((name git-repository-id)
(simple-format #t "deleting data for ~A (~A)\n"
name git-repository-id)
(delete-data-for-branch conn
(string->number git-repository-id)
name)))
(exec-query
conn
"
SELECT name, git_repository_id
FROM (
SELECT DISTINCT ON (name, git_repository_id)
name, git_repository_id, commit
FROM git_branches
INNER JOIN git_commits
ON git_commits.git_branch_id = git_branches.id
ORDER BY git_repository_id, name, datetime DESC
) AS git_branches_latest_revision
WHERE commit = ''")))))
(define* (delete-unreferenced-derivations #:key
(batch-size 100000))
(define (delete-builds-for-derivation-output-details-set
conn
derivation-output-details-set-id)
(let ((build-ids
(map car
(exec-query
conn
"
SELECT id
FROM builds
WHERE derivation_output_details_set_id = $1"
(list derivation-output-details-set-id)))))
(unless (null? build-ids)
(exec-query
conn
(string-append
"
DELETE FROM build_status WHERE build_id IN ("
(string-join build-ids ",")
")"))
(exec-query
conn
(string-append
"
DELETE FROM latest_build_status WHERE build_id IN ("
(string-join build-ids ",")
")"))
(exec-query
conn
(string-append
"
DELETE FROM builds WHERE id IN ("
(string-join build-ids ",")
")")))))
(define (delete-blocked-builds-for-derivation-output-details-set
conn
derivation-output-details-set-id)
;; Do this for each build server individually, as that helps PostgreSQL
;; efficiently check the partitions
(let ((build-server-ids
(map
car
(exec-query
conn
"SELECT id FROM build_servers"))))
(for-each
(lambda (build-server-id)
(exec-query
conn
"
DELETE FROM blocked_builds
WHERE build_server_id = $1
AND (
blocked_derivation_output_details_set_id = $2 OR blocking_derivation_output_details_set_id = $3
)"
(list build-server-id
derivation-output-details-set-id
derivation-output-details-set-id)))
build-server-ids)))
(define (delete-unreferenced-derivations-source-files conn)
(define (delete-batch)
(exec-query
conn
"
DELETE FROM derivation_source_files
WHERE id IN (
SELECT id
FROM derivation_source_files
WHERE NOT EXISTS (
SELECT 1
FROM derivation_sources
WHERE derivation_source_file_id = derivation_source_files.id
)
LIMIT 100
)
RETURNING id"))
(while (not (null? (delete-batch)))
#t))
(define (maybe-delete-derivation conn id)
(match (map
car
(exec-query
conn
"
DELETE FROM derivation_outputs WHERE derivation_id = $1
AND NOT EXISTS (
SELECT 1
FROM derivation_inputs
WHERE derivation_output_id IN (
SELECT derivation_outputs.id
FROM derivation_outputs
WHERE derivation_id = $1
)
)
RETURNING derivation_outputs.derivation_output_details_id"
(list id)))
(() 0)
((derivation-output-details-ids ...)
(for-each
(lambda (derivation-output-details-id)
(unless (string->number derivation-output-details-id)
(error
(simple-format #f "derivation-output-details-id: ~A is not a number"
derivation-output-details-id)))
(match (exec-query
conn
"
SELECT COUNT(*) FROM derivation_outputs
WHERE derivation_output_details_id = $1"
(list derivation-output-details-id))
(((count))
(when (eq? (string->number count)
0)
(exec-query
conn
"
DELETE FROM derivation_output_details
WHERE id = $1"
(list derivation-output-details-id))))))
derivation-output-details-ids)
(exec-query
conn
"
DELETE FROM derivation_sources WHERE derivation_id = $1"
(list id))
(match (exec-query
conn
"
SELECT derivation_output_details_set_id
FROM derivations_by_output_details_set
WHERE derivation_id = $1"
(list id))
(((derivation-output-details-set-id))
(match (exec-query
conn
"
SELECT COUNT(*) FROM derivations_by_output_details_set
WHERE derivation_output_details_set_id = $1"
(list derivation-output-details-set-id))
(((count))
(exec-query
conn
"
DELETE FROM derivations_by_output_details_set
WHERE derivation_id = $1"
(list id))
(when (<= (string->number count)
1)
(delete-builds-for-derivation-output-details-set
conn
derivation-output-details-set-id)
(delete-blocked-builds-for-derivation-output-details-set
conn
derivation-output-details-set-id)
(exec-query
conn
"
DELETE FROM derivation_output_details_sets
WHERE id = $1"
(list derivation-output-details-set-id)))))))
(exec-query
conn
"
DELETE FROM derivation_inputs WHERE derivation_id = $1"
(list id))
(exec-query
conn
"
DELETE FROM derivations WHERE id = $1"
(list id))
1)))
(define deleted-count 0)
(define channel (make-channel))
(define (delete-batch conn)
(let* ((derivations
(with-time-logging "fetching batch of derivations"
(map car
(exec-query
conn
"
SELECT DISTINCT derivation_id
FROM derivation_outputs
WHERE NOT EXISTS (
-- This isn't a perfect check, as this will select some derivations that are
-- used, but maybe-delete-derivation includes the proper check
SELECT 1
FROM derivation_inputs
WHERE derivation_output_id = derivation_outputs.id
) AND NOT EXISTS (
SELECT 1
FROM package_derivations
WHERE package_derivations.derivation_id = derivation_outputs.derivation_id
) AND NOT EXISTS (
SELECT 1 FROM channel_instances
WHERE derivation_id = derivation_outputs.derivation_id
) AND NOT EXISTS (
SELECT 1 FROM guix_revision_system_test_derivations
WHERE derivation_id = derivation_outputs.derivation_id
) LIMIT $1"
(list (number->string batch-size))))))
(derivations-count (length derivations)))
(with-time-logging
(simple-format #f "Looking at ~A derivations" derivations-count)
(set! deleted-count 0)
(for-each
(lambda (derivation-id)
(put-message channel derivation-id))
derivations))
(simple-format (current-error-port)
"Deleted ~A derivations\n"
deleted-count)
deleted-count))
(run-fibers
(lambda ()
;; First spawn some fibers to delete the derivations
(for-each
(lambda _
(spawn-fiber
(lambda ()
(with-postgresql-connection
"data-deletion"
(lambda (conn)
(let loop ((derivation-id (get-message channel)))
(unless (string->number derivation-id)
(error
(simple-format #f "derivation-id: ~A is not a number"
derivation-id)))
(let ((val
(catch 'psql-query-error
(lambda ()
(with-postgresql-transaction
conn
(lambda (conn)
(exec-query
conn
"
SET CONSTRAINTS derivations_by_output_details_set_derivation_id_fkey DEFERRED")
(exec-query conn "SET LOCAL lock_timeout = '5s';")
(maybe-delete-derivation conn
derivation-id))))
(lambda (key . args)
(simple-format
(current-error-port)
"error when attempting to delete derivation: ~A ~A\n"
key args)
0))))
;; This is safe as all fibers are in the same
;; thread and cooperative.
(set! deleted-count
(+ val deleted-count)))
(loop (get-message channel))))))))
(iota 12))
(with-postgresql-connection
"data-deletion"
(lambda (conn)
(obtain-advisory-transaction-lock
conn
'delete-unreferenced-derivations)
(let loop ((total-deleted 0))
(let ((batch-deleted-count (delete-batch conn)))
(if (eq? 0 batch-deleted-count)
(begin
(with-time-logging
"Deleting unused derivation_source_files entries"
(delete-unreferenced-derivations-source-files conn))
(simple-format
(current-output-port)
"Finished deleting derivations, deleted ~A in total\n"
total-deleted))
(loop (+ total-deleted batch-deleted-count))))))))
#:hz 0
#:parallelism 1))
(define (delete-nars-for-unknown-store-paths)
(define (get-nar-ids-batch conn)
(map
car
(exec-query
conn
"
SELECT id
FROM nars
WHERE NOT EXISTS
(
SELECT 1 FROM derivation_output_details
WHERE derivation_output_details.path = nars.store_path
)
LIMIT 50")))
(define (delete-narinfo-signature-data conn nar-ids)
(exec-query
conn
(string-append
"
DELETE FROM narinfo_signature_data
WHERE id IN (
SELECT narinfo_signature_data_id
FROM narinfo_signatures
WHERE nar_id IN ("
(string-join nar-ids ",")
")
)")))
(define (delete-nars conn nar-ids)
(exec-query
conn
(string-append
"
DELETE FROM nars WHERE id IN ("
(string-join nar-ids ",")
"
)")))
(with-postgresql-connection
"data-deletion"
(lambda (conn)
(with-advisory-session-lock
conn
'delete-nars-for-unknown-store-paths
(lambda ()
(newline)
(let loop ((nar-ids (get-nar-ids-batch conn)))
(unless (null? nar-ids)
(delete-narinfo-signature-data conn nar-ids)
(delete-nars conn nar-ids)
(display ".")
(force-output)
(loop (get-nar-ids-batch conn)))))))))