Support polling git repositories for new branches/revisions

This is mostly a workaround for the occasional problems with the guix-commits
mailing list, as it can break and then the data service doesn't learn about
new revisions until the problem is fixed.

I think it's still a generally good feature though, and allows deploying the
data service without it consuming emails to learn about new revisions, and is
a step towards integrating some kind of way of notifying the data service to
poll.
This commit is contained in:
Christopher Baines 2023-10-09 21:29:58 +01:00
parent 9bb8f84741
commit 10bad53ad5
14 changed files with 328 additions and 53 deletions

View File

@ -69,6 +69,7 @@ check-with-tmp-database:
SOURCES = \
guix-data-service/branch-updated-emails.scm \
guix-data-service/poll-git-repository.scm \
guix-data-service/builds.scm \
guix-data-service/comparison.scm \
guix-data-service/config.scm \

View File

@ -20,6 +20,10 @@
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-19)
#:use-module (email email)
#:use-module (squee)
#:use-module (guix store)
#:use-module (guix channels)
#:use-module (guix-data-service database)
#:use-module (guix-data-service model git-repository)
#:use-module (guix-data-service model git-branch)
#:use-module (guix-data-service model git-commit)
@ -60,25 +64,72 @@
(when (and (not excluded-branch?)
(or (null? included-branches)
included-branch?))
(insert-git-commit-entry conn
(or (git-branch-for-repository-and-name
conn
git-repository-id
branch-name)
(insert-git-branch-entry
conn
git-repository-id
branch-name))
(if (string=? commit-all-zeros
x-git-newrev)
(if (string=? commit-all-zeros x-git-newrev)
(insert-git-commit-entry conn
(or (git-branch-for-repository-and-name
conn
git-repository-id
branch-name)
(insert-git-branch-entry
conn
git-repository-id
branch-name))
""
x-git-newrev)
date)
date)
(unless (string=? commit-all-zeros x-git-newrev)
(enqueue-load-new-guix-revision-job
conn
git-repository-id
x-git-newrev
(string-append x-git-repo " "
x-git-refname " updated")))))))))))
;; Fetch the latest channel instance to check if this
;; email matches up with the current state of the Git
;; repository, and ignore it if it doesn't.
(let* ((git-repository-details
(select-git-repository conn git-repository-id))
(channel-for-commit
(channel (name 'guix)
(url (second git-repository-details))
(commit x-git-repo)))
(channel-instance
;; Obtain a session level lock here, to avoid conflicts with
;; other jobs over the Git repository.
(with-advisory-session-lock/log-time
conn
'latest-channel-instances
(lambda ()
(with-store store
(first
(latest-channel-instances store
(list channel-for-commit)
#:authenticate?
(fourth git-repository-details))))))))
(if (string=? (channel-instance-commit channel-instance)
x-git-newrev)
(with-postgresql-transaction
conn
(lambda (conn)
(exec-query conn "LOCK TABLE git_commits IN EXCLUSIVE MODE")
(if (git-commit-exists? conn x-git-newrev)
(simple-format #t "commit already exists for revision ~A (date: ~A)\n"
x-git-newrev
date)
(begin
(insert-git-commit-entry conn
(or (git-branch-for-repository-and-name
conn
git-repository-id
branch-name)
(insert-git-branch-entry
conn
git-repository-id
branch-name))
x-git-newrev
date)
(enqueue-load-new-guix-revision-job
conn
git-repository-id
x-git-newrev
(string-append x-git-repo " "
x-git-refname " updated"))))))
(simple-format #t "email newrev ~A doesn't match latest channel instance commit ~A\n"
x-git-newrev
(channel-instance-commit channel-instance)))))))))))))

View File

@ -39,6 +39,7 @@
check-test-database!
with-advisory-session-lock
with-advisory-session-lock/log-time
obtain-advisory-transaction-lock
exec-query-with-null-handling))
@ -298,6 +299,22 @@
"SELECT pg_advisory_unlock($1)"
(list lock-number))))))
(define (with-advisory-session-lock/log-time conn lock f)
(simple-format #t "debug: Acquiring advisory session lock: ~A\n" lock)
(let ((start-time (current-time)))
(with-advisory-session-lock
conn
lock
(lambda ()
(let ((time-taken (- (current-time) start-time)))
(simple-format #t "debug: Finished aquiring lock ~A, took ~A seconds\n"
lock time-taken))
(let ((result (f)))
(let ((time-spent (- (current-time) start-time)))
(simple-format #t "debug: Releasing lock ~A, spent ~A seconds\n"
lock time-spent))
result)))))
(define (obtain-advisory-transaction-lock conn lock)
(let ((lock-number (number->string (symbol-hash lock))))
(exec-query conn

View File

@ -344,22 +344,6 @@ WHERE job_id = $1")
(simple-format #t "debug: Finished ~A, took ~A seconds\n"
action time-taken)))))
(define (with-advisory-session-lock/log-time conn lock f)
(simple-format #t "debug: Acquiring advisory session lock: ~A\n" lock)
(let ((start-time (current-time)))
(with-advisory-session-lock
conn
lock
(lambda ()
(let ((time-taken (- (current-time) start-time)))
(simple-format #t "debug: Finished aquiring lock ~A, took ~A seconds\n"
lock time-taken))
(let ((result (f)))
(let ((time-spent (- (current-time) start-time)))
(simple-format #t "debug: Releasing lock ~A, spent ~A seconds\n"
lock time-spent))
result)))))
(define (inferior-guix-systems inf)
;; The order shouldn't matter here, but bugs in Guix can lead to different
;; results depending on the order, so sort the systems to try and provide

View File

@ -21,7 +21,8 @@
#:use-module (squee)
#:use-module (srfi srfi-19)
#:use-module (guix-data-service model utils)
#:export (insert-git-commit-entry))
#:export (insert-git-commit-entry
git-commit-exists?))
(define (insert-git-commit-entry conn
git-branch-id
@ -36,3 +37,11 @@ ON CONFLICT DO NOTHING"
(list commit
(number->string git-branch-id)
(date->string datetime "~s"))))
(define (git-commit-exists? conn commit)
(match (exec-query
conn
"SELECT 1 FROM git_commits WHERE commit = $1"
(list commit))
(#f #f)
(_ #t)))

View File

@ -35,28 +35,36 @@
(define (all-git-repositories conn)
(map
(match-lambda
((id label url cgit-base-url)
((id label url cgit-base-url poll-interval)
(list (string->number id)
label
url
cgit-base-url)))
cgit-base-url
(and=> poll-interval string->number))))
(exec-query
conn
(string-append
"SELECT id, label, url, cgit_url_base FROM git_repositories ORDER BY id ASC"))))
"
SELECT id, label, url, cgit_url_base, poll_interval
FROM git_repositories ORDER BY id ASC")))
(define (select-git-repository conn id)
(match (exec-query
conn
"SELECT label, url, cgit_url_base, fetch_with_authentication FROM git_repositories WHERE id = $1"
(list id))
"
SELECT label, url, cgit_url_base, fetch_with_authentication, poll_interval
FROM git_repositories
WHERE id = $1"
(list (if (number? id)
(number->string id)
id)))
(()
#f)
(((label url cgit_url_base fetch_with_authentication))
(((label url cgit_url_base fetch_with_authentication poll-interval))
(list label
url
cgit_url_base
(string=? fetch_with_authentication "t")))))
(string=? fetch_with_authentication "t")
(and=> poll-interval string->number)))))
(define (git-repository-query-substitutes? conn id)
(match (exec-query

View File

@ -0,0 +1,168 @@
;;; Guix Data Service -- Information about Guix over time
;;; Copyright © 2023 Christopher Baines <mail@cbaines.net>
;;;
;;; This program is free software: you can redistribute it and/or
;;; modify it under the terms of the GNU Affero General Public License
;;; as published by the Free Software Foundation, either version 3 of
;;; the License, or (at your option) any later version.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Affero General Public License for more details.
;;;
;;; You should have received a copy of the GNU Affero General Public
;;; License along with this program. If not, see
;;; <http://www.gnu.org/licenses/>.
(define-module (guix-data-service poll-git-repository)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-19)
#:use-module (srfi srfi-71)
#:use-module (ice-9 threads)
#:use-module (squee)
#:use-module (git oid)
#:use-module (git branch)
#:use-module (git reference)
#:use-module (guix git)
#:use-module (guix channels)
#:use-module (guix-data-service database)
#:use-module (guix-data-service model git-repository)
#:use-module (guix-data-service model git-branch)
#:use-module (guix-data-service model git-commit)
#:use-module (guix-data-service jobs load-new-guix-revision)
#:export (start-thread-to-poll-git-repository))
(define (start-thread-to-poll-git-repository git-repository-id)
(call-with-new-thread
(lambda ()
(with-postgresql-connection
(simple-format #f "poll-git-repository-~A"
git-repository-id)
(lambda (conn)
(let loop ()
(with-exception-handler
(lambda (exn)
(simple-format #t "exception when polling git repository (~A): ~A\n"
git-repository-id exn))
(lambda ()
(with-throw-handler #t
(lambda ()
(poll-git-repository conn git-repository-id))
(lambda _
(backtrace))))
#:unwind? #t)
(and=>
(fifth (select-git-repository conn git-repository-id))
(lambda (poll-interval)
(sleep poll-interval)
(loop)))))))))
(define (poll-git-repository conn git-repository-id)
(define git-repository-details
(select-git-repository conn git-repository-id))
;; Obtain a session level lock here, to avoid conflicts with other jobs over
;; the Git repository.
(with-advisory-session-lock/log-time
conn
'latest-channel-instances
(lambda ()
;; Maybe this helps avoid segfaults?
(monitor
(update-cached-checkout (second git-repository-details)))
(let* ((repository-directory
(url-cache-directory
(second git-repository-details)))
(included-branches
excluded-branches
(select-includes-and-excluded-branches-for-git-repository
conn
git-repository-id))
(repository-branches
(with-repository repository-directory repository
(map
(lambda (branch-reference)
(let* ((branch-name
(last
(string-split
(reference-shorthand branch-reference)
#\/))))
(cons
branch-name
;; TODO Not sure what the right way to do this is
(and=> (false-if-exception
(reference-target branch-reference))
oid->string))))
(branch-list repository BRANCH-REMOTE)))))
(with-postgresql-transaction
conn
(lambda (conn)
(exec-query conn "LOCK TABLE git_commits IN EXCLUSIVE MODE")
(let* ((repository-branch-details
(all-branches-with-most-recent-commit conn
git-repository-id))
(branch-names
(filter
(lambda (branch-name)
(let ((excluded-branch?
(member branch-name excluded-branches string=?))
(included-branch?
(member branch-name included-branches string=?)))
(and (not excluded-branch?)
(or (null? included-branches)
included-branch?))))
(delete-duplicates!
(append!
(map car repository-branches)
(map car repository-branch-details))))))
(for-each
(lambda (branch-name)
(define (git-branch-entry)
(or (git-branch-for-repository-and-name
conn
git-repository-id
branch-name)
(insert-git-branch-entry
conn
git-repository-id
branch-name)))
(let ((repository-commit
(assoc-ref repository-branches branch-name))
(database-commit
(and=> (assoc-ref repository-branch-details
branch-name)
first)))
(if repository-commit
(if (and database-commit
(string=? database-commit
repository-commit))
#f ;; Nothing to do
(begin
(insert-git-commit-entry conn
(git-branch-entry)
repository-commit
(current-date 0))
(unless #f
(enqueue-load-new-guix-revision-job
conn
git-repository-id
repository-commit
"poll"))))
(if database-commit
#f ;; Nothing to do
(insert-git-commit-entry conn
(git-branch-entry)
""
(current-date 0))))))
branch-names))))))))

View File

@ -58,7 +58,7 @@
`((repositories
. ,(list->vector
(map (match-lambda
((id label url cgit-base-url)
((id label url cgit-base-url _)
`((id . ,id)
(label . ,label)
(url . ,url))))
@ -70,7 +70,7 @@
(('GET "repository" id)
(match (with-resource-from-pool (connection-pool) conn
(select-git-repository conn id))
((label url cgit-url-base fetch-with-authentication?)
((label url cgit-url-base fetch-with-authentication? poll-interval)
(letpar& ((branches
(with-resource-from-pool (connection-pool) conn
(all-branches-with-most-recent-commit

View File

@ -314,7 +314,7 @@
"Jobs"))))
,@(map
(match-lambda
(((repository-id label url cgit-url-base) . branches-with-most-recent-commits)
(((repository-id label url cgit-url-base poll-interval) . branches-with-most-recent-commits)
`(div
(@ (class "row"))
(div

View File

@ -37,6 +37,8 @@
(guix-data-service config)
(guix-data-service database)
(guix-data-service substitutes)
(guix-data-service poll-git-repository)
(guix-data-service model git-repository)
(guix-data-service model guix-revision-package-derivation)
(guix-data-service web server)
(guix-data-service web controller)
@ -204,12 +206,24 @@
(start-substitute-query-threads)
(when (assoc-ref opts 'update-database)
(call-with-new-thread
(lambda ()
(run-sqitch)
(call-with-new-thread
(lambda ()
(run-sqitch)
(atomic-box-set! startup-completed #t))))
(for-each
(lambda (git-repository-details)
(when (fifth git-repository-details)
(simple-format #t "starting thread to poll ~A (~A)\n"
(second git-repository-details)
(third git-repository-details))
(start-thread-to-poll-git-repository
(first git-repository-details))))
(with-postgresql-connection
"poll-startup"
all-git-repositories))
(atomic-box-set! startup-completed #t)))
;; Provide some visual space between the startup output and the
;; server starting

View File

@ -0,0 +1,8 @@
-- Deploy guix-data-service:git_repositories_poll_interval to pg
BEGIN;
ALTER TABLE git_repositories
ADD COLUMN poll_interval INTEGER DEFAULT NULL;
COMMIT;

View File

@ -0,0 +1,7 @@
-- Revert guix-data-service:git_repositories_poll_interval from pg
BEGIN;
-- XXX Add DDLs here.
COMMIT;

View File

@ -96,3 +96,4 @@ blocked_builds_blocked_builds_blocked_derivation_output_details_set_id_2 2023-03
guix_revision_package_derivation_distribution_counts 2023-03-08T16:53:44Z Chris <chris@felis> # Add guix_revision_package_derivation_distribution_counts table
cascade_nar_foreign_keys 2023-08-01T09:42:33Z Chris <chris@felis> # Make it easier to delete nars entries
nar_indexes 2023-08-01T11:37:35Z Chris <chris@felis> # Add nar related indexes
git_repositories_poll_interval 2023-10-08T20:36:09Z Chris <chris@felis> # Add git_repositories.poll_interval

View File

@ -0,0 +1,7 @@
-- Verify guix-data-service:git_repositories_poll_interval on pg
BEGIN;
-- XXX Add verifications here.
ROLLBACK;