mirror of
git://git.savannah.gnu.org/guix/data-service.git
synced 2023-12-14 03:23:03 +01:00
39487cd7e6
Drop the batch size to get rid of warnings about memory usage and improve the logging by adding duration information.
570 lines
17 KiB
Scheme
570 lines
17 KiB
Scheme
;;; Guix Data Service -- Information about Guix over time
|
|
;;; Copyright © 2020 Christopher Baines <mail@cbaines.net>
|
|
;;;
|
|
;;; This program is free software: you can redistribute it and/or
|
|
;;; modify it under the terms of the GNU Affero General Public License
|
|
;;; as published by the Free Software Foundation, either version 3 of
|
|
;;; the License, or (at your option) any later version.
|
|
;;;
|
|
;;; This program is distributed in the hope that it will be useful,
|
|
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
;;; Affero General Public License for more details.
|
|
;;;
|
|
;;; You should have received a copy of the GNU Affero General Public
|
|
;;; License along with this program. If not, see
|
|
;;; <http://www.gnu.org/licenses/>.
|
|
|
|
(define-module (guix-data-service data-deletion)
|
|
#:use-module (srfi srfi-1)
|
|
#:use-module (srfi srfi-11)
|
|
#:use-module (ice-9 match)
|
|
#:use-module (ice-9 threads)
|
|
#:use-module (squee)
|
|
#:use-module (fibers)
|
|
#:use-module (guix-data-service utils)
|
|
#:use-module (guix-data-service database)
|
|
#:use-module (guix-data-service model package-derivation-by-guix-revision-range)
|
|
#:export (delete-guix-revisions
|
|
delete-data-for-branch
|
|
delete-revisions-from-branch-except-most-recent-n
|
|
delete-revisions-for-all-branches-except-most-recent-n
|
|
delete-data-for-all-deleted-branches
|
|
delete-unreferenced-derivations))
|
|
|
|
(define (delete-guix-revisions conn git-repository-id commits)
|
|
(define (delete-unreferenced-package-derivations)
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM package_derivations
|
|
WHERE NOT EXISTS (
|
|
SELECT 1
|
|
FROM guix_revision_package_derivations
|
|
WHERE guix_revision_package_derivations.package_derivation_id =
|
|
package_derivations.id
|
|
)"))
|
|
|
|
(define (delete-unreferenced-lint-warnings)
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM lint_warnings
|
|
WHERE NOT EXISTS (
|
|
SELECT 1
|
|
FROM guix_revision_lint_warnings
|
|
WHERE guix_revision_lint_warnings.lint_warning_id =
|
|
lint_warnings.id
|
|
)"))
|
|
|
|
(define (delete-unreferenced-lint-checkers)
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM lint_checkers
|
|
WHERE NOT EXISTS (
|
|
SELECT 1
|
|
FROM guix_revision_lint_checkers
|
|
WHERE guix_revision_lint_checkers.lint_checker_id =
|
|
lint_checkers.id
|
|
)"))
|
|
|
|
(let ((guix-revision-ids
|
|
(map
|
|
car
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
SELECT guix_revisions.id
|
|
FROM (VALUES "
|
|
(string-join
|
|
(map (lambda (commit)
|
|
(string-append "('" commit "')"))
|
|
commits)
|
|
", ")
|
|
") AS commits
|
|
INNER JOIN guix_revisions
|
|
ON guix_revisions.commit = commits.column1
|
|
WHERE guix_revisions.git_repository_id = "
|
|
(number->string git-repository-id) " AND
|
|
commits.column1 NOT IN (
|
|
SELECT commit
|
|
FROM git_commits
|
|
)")))))
|
|
|
|
(unless (null? guix-revision-ids)
|
|
(for-each
|
|
(lambda (table)
|
|
(exec-query
|
|
conn
|
|
(simple-format
|
|
#f
|
|
"
|
|
DELETE FROM ~A WHERE ~A IN (VALUES ~A)"
|
|
table
|
|
(if (string=? table
|
|
"guix_revision_package_derivations")
|
|
"revision_id"
|
|
"guix_revision_id")
|
|
(string-join
|
|
(map (lambda (guix-revision-id)
|
|
(string-append "(" guix-revision-id ")"))
|
|
guix-revision-ids)
|
|
", "))))
|
|
'("channel_instances"
|
|
"guix_revision_channel_news_entries"
|
|
"guix_revision_lint_checkers"
|
|
"guix_revision_lint_warnings"
|
|
"guix_revision_package_derivations"
|
|
"guix_revision_system_test_derivations"))
|
|
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
DELETE FROM guix_revisions
|
|
WHERE id IN ("
|
|
(string-join guix-revision-ids ", ")
|
|
")
|
|
AND id NOT IN (
|
|
SELECT guix_revisions.id
|
|
FROM guix_revisions
|
|
INNER JOIN git_branches ON
|
|
git_branches.git_repository_id = guix_revisions.git_repository_id
|
|
INNER JOIN git_commits ON
|
|
git_commits.git_branch_id = git_branches.id AND
|
|
git_commits.commit = guix_revisions.commit
|
|
)"))
|
|
|
|
(delete-unreferenced-package-derivations)
|
|
(delete-unreferenced-lint-warnings)
|
|
(delete-unreferenced-lint-checkers))))
|
|
|
|
(define (delete-revisions-from-branch conn git-repository-id branch-name commits)
|
|
(define (delete-jobs conn)
|
|
(for-each
|
|
(lambda (table)
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
DELETE FROM " table "
|
|
WHERE job_id IN (
|
|
SELECT id
|
|
FROM load_new_guix_revision_jobs
|
|
WHERE git_repository_id = " (number->string git-repository-id) " AND
|
|
commit IN ("
|
|
(string-join
|
|
(map (lambda (commit)
|
|
(string-append "'" commit "'"))
|
|
commits)
|
|
", ")
|
|
")
|
|
)")))
|
|
'("load_new_guix_revision_job_events"
|
|
"load_new_guix_revision_job_logs"))
|
|
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
DELETE FROM load_new_guix_revision_jobs
|
|
WHERE git_repository_id = " (number->string git-repository-id) " AND
|
|
commit IN ("
|
|
(string-join
|
|
(map (lambda (commit)
|
|
(string-append "'" commit "'"))
|
|
commits)
|
|
", ")
|
|
")")))
|
|
|
|
(define (delete-from-git-commits conn)
|
|
(exec-query
|
|
conn
|
|
(simple-format
|
|
#f
|
|
"
|
|
DELETE FROM git_commits
|
|
WHERE id IN (
|
|
SELECT git_commits.id
|
|
FROM git_commits
|
|
INNER JOIN git_branches
|
|
ON git_branches.id = git_commits.git_branch_id
|
|
WHERE git_branches.git_repository_id = ~A
|
|
AND git_branches.name = '~A'
|
|
AND git_commits.commit IN (~A)
|
|
)"
|
|
git-repository-id
|
|
branch-name
|
|
(string-join
|
|
(map (lambda (commit)
|
|
(string-append "'" commit "'"))
|
|
commits)
|
|
", "))))
|
|
|
|
(with-postgresql-transaction
|
|
conn
|
|
(lambda (conn)
|
|
(delete-from-git-commits conn)
|
|
(delete-jobs conn)
|
|
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM package_derivations_by_guix_revision_range
|
|
WHERE git_branch_id IN (
|
|
SELECT id
|
|
FROM git_branches
|
|
WHERE git_repository_id = $1 AND
|
|
name = $2
|
|
)"
|
|
(list (number->string git-repository-id)
|
|
branch-name))
|
|
|
|
(delete-guix-revisions conn git-repository-id commits))))
|
|
|
|
(define (delete-data-for-branch conn git-repository-id branch-name)
|
|
(define commits
|
|
(map car
|
|
(exec-query conn
|
|
"
|
|
SELECT git_commits.commit
|
|
FROM git_branches
|
|
INNER JOIN git_commits
|
|
ON git_branches.id = git_commits.git_branch_id
|
|
WHERE git_repository_id = $1
|
|
AND git_branches.name = $2"
|
|
(list (number->string git-repository-id)
|
|
branch-name))))
|
|
|
|
(delete-revisions-from-branch conn
|
|
git-repository-id
|
|
branch-name
|
|
commits))
|
|
|
|
(define (delete-revisions-from-branch-except-most-recent-n conn
|
|
git-repository-id
|
|
branch-name
|
|
n)
|
|
(define commits
|
|
(map car
|
|
(exec-query conn
|
|
"
|
|
SELECT commit
|
|
FROM git_commits
|
|
INNER JOIN git_branches
|
|
ON git_branches.id = git_commits.git_branch_id
|
|
WHERE git_repository_id = $1 AND name = $2
|
|
ORDER BY datetime DESC
|
|
OFFSET $3"
|
|
(list (number->string git-repository-id)
|
|
branch-name
|
|
(number->string n)))))
|
|
|
|
(unless (null? commits)
|
|
(simple-format #t "deleting ~A commits from ~A\n" (length commits) branch-name)
|
|
(delete-revisions-from-branch conn
|
|
git-repository-id
|
|
branch-name
|
|
commits)
|
|
|
|
(simple-format #t "repopulating package_derivations_by_guix_revision_range\n")
|
|
(insert-guix-revision-package-derivation-entries conn
|
|
(number->string
|
|
git-repository-id)
|
|
branch-name)))
|
|
|
|
(define (delete-revisions-for-all-branches-except-most-recent-n n)
|
|
(with-postgresql-connection
|
|
"data-deletion"
|
|
(lambda (conn)
|
|
(for-each
|
|
(match-lambda
|
|
((git-repository-id branch-name)
|
|
(delete-revisions-from-branch-except-most-recent-n
|
|
conn
|
|
(string->number git-repository-id)
|
|
branch-name
|
|
n)))
|
|
(exec-query
|
|
conn
|
|
"
|
|
SELECT DISTINCT git_repository_id, name
|
|
FROM git_branches")))))
|
|
|
|
(define (delete-data-for-all-branches-but-master)
|
|
(with-postgresql-connection
|
|
"data-deletion"
|
|
(lambda (conn)
|
|
(for-each
|
|
(lambda (branch-name)
|
|
(delete-data-for-branch conn 1 branch-name))
|
|
(map
|
|
car
|
|
(exec-query
|
|
conn
|
|
"
|
|
SELECT DISTINCT name
|
|
FROM git_branches
|
|
WHERE git_repository_id = 1 AND name != 'master'"))))))
|
|
|
|
(define (delete-data-for-all-deleted-branches)
|
|
(with-postgresql-connection
|
|
"data-deletion"
|
|
(lambda (conn)
|
|
(for-each
|
|
(match-lambda
|
|
((name git-repository-id)
|
|
(simple-format #t "deleting data for ~A (~A)\n"
|
|
name git-repository-id)
|
|
(delete-data-for-branch conn
|
|
(string->number git-repository-id)
|
|
name)))
|
|
(exec-query
|
|
conn
|
|
"
|
|
SELECT name, git_repository_id
|
|
FROM (
|
|
SELECT DISTINCT ON (name, git_repository_id)
|
|
name, git_repository_id, commit
|
|
FROM git_branches
|
|
INNER JOIN git_commits
|
|
ON git_commits.git_branch_id = git_branches.id
|
|
ORDER BY git_repository_id, name, datetime DESC
|
|
) AS git_branches_latest_revision
|
|
WHERE commit = ''")))))
|
|
|
|
(define* (delete-unreferenced-derivations #:key
|
|
(batch-size 100000))
|
|
(define (delete-builds-for-derivation-output-details-set
|
|
conn
|
|
derivation-output-details-set-id)
|
|
(let ((build-ids
|
|
(map car
|
|
(exec-query
|
|
conn
|
|
"
|
|
SELECT id
|
|
FROM builds
|
|
WHERE derivation_output_details_set_id = $1"
|
|
(list derivation-output-details-set-id)))))
|
|
|
|
(unless (null? build-ids)
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
DELETE FROM build_status WHERE build_id IN ("
|
|
(string-join build-ids ",")
|
|
")"))
|
|
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
DELETE FROM latest_build_status WHERE build_id IN ("
|
|
(string-join build-ids ",")
|
|
")"))
|
|
|
|
(exec-query
|
|
conn
|
|
(string-append
|
|
"
|
|
DELETE FROM builds WHERE id IN ("
|
|
(string-join build-ids ",")
|
|
")")))))
|
|
|
|
(define (delete-unreferenced-derivations-source-files conn)
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivation_source_files
|
|
WHERE NOT EXISTS (
|
|
SELECT 1
|
|
FROM derivation_sources
|
|
WHERE derivation_source_file_id = derivation_source_files.id
|
|
)"))
|
|
|
|
(define (maybe-delete-derivation conn id)
|
|
(match (map
|
|
car
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivation_outputs WHERE derivation_id = $1
|
|
AND NOT EXISTS (
|
|
SELECT 1
|
|
FROM derivation_inputs
|
|
WHERE derivation_output_id IN (
|
|
SELECT derivation_outputs.id
|
|
FROM derivation_outputs
|
|
WHERE derivation_id = $1
|
|
)
|
|
)
|
|
RETURNING derivation_outputs.derivation_output_details_id"
|
|
(list id)))
|
|
(() 0)
|
|
((derivation-output-details-ids ...)
|
|
|
|
(for-each
|
|
(lambda (derivation-output-details-id)
|
|
(unless (string->number derivation-output-details-id)
|
|
(error
|
|
(simple-format #f "derivation-output-details-id: ~A is not a number"
|
|
derivation-output-details-id)))
|
|
|
|
(match (exec-query
|
|
conn
|
|
"
|
|
SELECT COUNT(*) FROM derivation_outputs
|
|
WHERE derivation_output_details_id = $1"
|
|
(list derivation-output-details-id))
|
|
(((count))
|
|
(when (eq? (string->number count)
|
|
0)
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivation_output_details
|
|
WHERE id = $1"
|
|
(list derivation-output-details-id))))))
|
|
derivation-output-details-ids)
|
|
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivation_sources WHERE derivation_id = $1"
|
|
(list id))
|
|
|
|
(match (exec-query
|
|
conn
|
|
"
|
|
SELECT derivation_output_details_set_id
|
|
FROM derivations_by_output_details_set
|
|
WHERE derivation_id = $1"
|
|
(list id))
|
|
(((derivation-output-details-set-id))
|
|
(match (exec-query
|
|
conn
|
|
"
|
|
SELECT COUNT(*) FROM derivations_by_output_details_set
|
|
WHERE derivation_output_details_set_id = $1"
|
|
(list derivation-output-details-set-id))
|
|
(((count))
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivations_by_output_details_set
|
|
WHERE derivation_id = $1"
|
|
(list id))
|
|
|
|
(when (<= (string->number count)
|
|
1)
|
|
(delete-builds-for-derivation-output-details-set
|
|
conn
|
|
derivation-output-details-set-id)
|
|
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivation_output_details_sets
|
|
WHERE id = $1"
|
|
(list derivation-output-details-set-id)))))))
|
|
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivation_inputs WHERE derivation_id = $1"
|
|
(list id))
|
|
|
|
(exec-query
|
|
conn
|
|
"
|
|
DELETE FROM derivations WHERE id = $1"
|
|
(list id))
|
|
|
|
1)))
|
|
|
|
(with-postgresql-connection
|
|
"data-deletion"
|
|
(lambda (conn)
|
|
(define (delete-batch conn)
|
|
(let* ((derivations
|
|
(with-time-logging "fetching batch of derivations"
|
|
(map car
|
|
(exec-query
|
|
conn
|
|
"
|
|
SELECT DISTINCT derivation_id
|
|
FROM derivation_outputs
|
|
WHERE NOT EXISTS (
|
|
-- This isn't a perfect check, as this will select some derivations that are
|
|
-- used, but maybe-delete-derivation includes the proper check
|
|
SELECT 1
|
|
FROM derivation_inputs
|
|
WHERE derivation_output_id = derivation_outputs.id
|
|
) AND NOT EXISTS (
|
|
SELECT 1
|
|
FROM package_derivations
|
|
WHERE package_derivations.derivation_id = derivation_outputs.derivation_id
|
|
) AND NOT EXISTS (
|
|
SELECT 1 FROM channel_instances
|
|
WHERE derivation_id = derivation_outputs.derivation_id
|
|
) AND NOT EXISTS (
|
|
SELECT 1 FROM guix_revision_system_test_derivations
|
|
WHERE derivation_id = derivation_outputs.derivation_id
|
|
) LIMIT $1"
|
|
(list (number->string batch-size))))))
|
|
(derivations-count (length derivations)))
|
|
(let ((deleted-count
|
|
(with-time-logging
|
|
(simple-format #f
|
|
"Looking at ~A derivations"
|
|
derivations-count)
|
|
(fold
|
|
(lambda (count result)
|
|
(+ result count))
|
|
0
|
|
(map
|
|
(lambda (derivation-id)
|
|
(unless (string->number derivation-id)
|
|
(error
|
|
(simple-format #f "derivation-id: ~A is not a number"
|
|
derivation-id)))
|
|
|
|
(with-thread-postgresql-connection
|
|
(lambda (conn)
|
|
(with-postgresql-transaction
|
|
conn
|
|
(lambda (conn)
|
|
(exec-query
|
|
conn
|
|
"
|
|
SET CONSTRAINTS derivations_by_output_details_set_derivation_id_fkey DEFERRED")
|
|
|
|
(maybe-delete-derivation conn
|
|
derivation-id))))))
|
|
derivations)))))
|
|
(simple-format (current-error-port)
|
|
"Deleted ~A derivations\n"
|
|
deleted-count)
|
|
deleted-count)))
|
|
|
|
(with-postgresql-connection-per-thread
|
|
"data-deletion-thread"
|
|
(lambda ()
|
|
(run-fibers
|
|
(lambda ()
|
|
(let loop ((total-deleted 0))
|
|
(let ((batch-deleted-count (delete-batch conn)))
|
|
(if (eq? 0 batch-deleted-count)
|
|
(begin
|
|
(with-time-logging
|
|
"Deleting unused derivation_source_files entries"
|
|
(delete-unreferenced-derivations-source-files conn))
|
|
(simple-format
|
|
(current-output-port)
|
|
"Finished deleting derivations, deleted ~A in total\n"
|
|
total-deleted))
|
|
(loop (+ total-deleted batch-deleted-count))))))))))))
|