Add remote build support.

* src/cuirass/remote.scm: New file.
* src/cuirass/remote-server.scm: New file.
* src/cuirass/remote-worker.scm: New file.
* bin/remote-server.in: New file.
* bin/remote-worker.in: New file.
* Makefile.am (bin_SCRIPTS): Add new binaries,
(dist_pkgmodule_DATA): add new files,
(EXTRA_DIST): add new binaries,
(bin/remote-server, bin/remote-worker): new targets.
* .gitignore: Add new binaries.
* bin/cuirass.in (%options): Add "--build-remote" option,
(show-help): document it,
(main): honor it.
* src/cuirass/base.scm (with-build-offload-thread): New macro,
(%build-remote?, %build-offload-channel): new parameters,
(make-build-offload-thread): new procedure,
(build-derivations/offload): new procedure,
(restart-builds): use it to offload builds when %build-remote? is set,
(build-packages): ditto.
This commit is contained in:
Mathieu Othacehe 2020-12-02 11:13:33 +01:00
parent f65ef23ce6
commit ca7a7ca989
No known key found for this signature in database
GPG Key ID: 8354763531769CA6
19 changed files with 1740 additions and 181 deletions

2
.gitignore vendored
View File

@ -12,6 +12,8 @@
/bin/cuirass /bin/cuirass
/bin/cuirass-send-events /bin/cuirass-send-events
/bin/evaluate /bin/evaluate
/bin/remote-server
/bin/remote-worker
/build-aux/config.guess /build-aux/config.guess
/build-aux/config.sub /build-aux/config.sub
/build-aux/install-sh /build-aux/install-sh

View File

@ -22,7 +22,13 @@
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with Cuirass. If not, see <http://www.gnu.org/licenses/>. # along with Cuirass. If not, see <http://www.gnu.org/licenses/>.
bin_SCRIPTS = bin/cuirass bin/cuirass-send-events bin/evaluate bin_SCRIPTS = \
bin/cuirass \
bin/cuirass-send-events \
bin/evaluate \
bin/remote-server \
bin/remote-worker
noinst_SCRIPTS = pre-inst-env noinst_SCRIPTS = pre-inst-env
guilesitedir = $(datarootdir)/guile/site/@GUILE_EFFECTIVE_VERSION@ guilesitedir = $(datarootdir)/guile/site/@GUILE_EFFECTIVE_VERSION@
@ -48,6 +54,9 @@ dist_pkgmodule_DATA = \
src/cuirass/http.scm \ src/cuirass/http.scm \
src/cuirass/logging.scm \ src/cuirass/logging.scm \
src/cuirass/metrics.scm \ src/cuirass/metrics.scm \
src/cuirass/remote.scm \
src/cuirass/remote-server.scm \
src/cuirass/remote-worker.scm \
src/cuirass/send-events.scm \ src/cuirass/send-events.scm \
src/cuirass/ui.scm \ src/cuirass/ui.scm \
src/cuirass/utils.scm \ src/cuirass/utils.scm \
@ -86,7 +95,9 @@ dist_sql_DATA = \
src/sql/upgrade-14.sql \ src/sql/upgrade-14.sql \
src/sql/upgrade-15.sql \ src/sql/upgrade-15.sql \
src/sql/upgrade-16.sql \ src/sql/upgrade-16.sql \
src/sql/upgrade-17.sql src/sql/upgrade-17.sql \
src/sql/upgrade-18.sql \
src/sql/upgrade-19.sql
dist_css_DATA = \ dist_css_DATA = \
src/static/css/cuirass.css \ src/static/css/cuirass.css \
@ -167,6 +178,8 @@ EXTRA_DIST = \
bin/cuirass.in \ bin/cuirass.in \
bin/cuirass-send-events.in \ bin/cuirass-send-events.in \
bin/evaluate.in \ bin/evaluate.in \
bin/remote-server.in \
bin/remote-worker.in \
bootstrap \ bootstrap \
build-aux/guix.scm \ build-aux/guix.scm \
src/cuirass/config.scm.in \ src/cuirass/config.scm.in \
@ -227,6 +240,9 @@ generate_file = \
bin/cuirass: $(srcdir)/bin/cuirass.in bin/cuirass: $(srcdir)/bin/cuirass.in
bin/cuirass-send-events: $(srcdir)/bin/cuirass-send-events.in bin/cuirass-send-events: $(srcdir)/bin/cuirass-send-events.in
bin/evaluate: $(srcdir)/bin/evaluate.in bin/evaluate: $(srcdir)/bin/evaluate.in
bin/remote-server: $(srcdir)/bin/remote-server.in
bin/remote-worker: $(srcdir)/bin/remote-worker.in
$(bin_SCRIPTS): Makefile $(bin_SCRIPTS): Makefile
$(generate_file); chmod +x $@ $(generate_file); chmod +x $@
src/cuirass/config.scm: $(srcdir)/src/cuirass/config.scm.in Makefile src/cuirass/config.scm: $(srcdir)/src/cuirass/config.scm.in Makefile

View File

@ -59,6 +59,7 @@ exec ${GUILE:-@GUILE@} --no-auto-compile -e main -s "$0" "$@"
-I, --interval=N Wait N seconds between each poll -I, --interval=N Wait N seconds between each poll
-Q, --queue-size=N Set the writer queue size to N elements. -Q, --queue-size=N Set the writer queue size to N elements.
--log-queries=FILE Log SQL queries in FILE. --log-queries=FILE Log SQL queries in FILE.
--build-remote Use the remote build mechanism
--use-substitutes Allow usage of pre-built substitutes --use-substitutes Allow usage of pre-built substitutes
--record-events Record events for distribution --record-events Record events for distribution
--threads=N Use up to N kernel threads --threads=N Use up to N kernel threads
@ -77,6 +78,7 @@ exec ${GUILE:-@GUILE@} --no-auto-compile -e main -s "$0" "$@"
(listen (value #t)) (listen (value #t))
(interval (single-char #\I) (value #t)) (interval (single-char #\I) (value #t))
(queue-size (single-char #\Q) (value #t)) (queue-size (single-char #\Q) (value #t))
(build-remote (value #f))
(use-substitutes (value #f)) (use-substitutes (value #f))
(threads (value #t)) (threads (value #t))
(fallback (value #f)) (fallback (value #f))
@ -103,6 +105,7 @@ exec ${GUILE:-@GUILE@} --no-auto-compile -e main -s "$0" "$@"
(%package-database (option-ref opts 'database (%package-database))) (%package-database (option-ref opts 'database (%package-database)))
(%package-cachedir (%package-cachedir
(option-ref opts 'cache-directory (%package-cachedir))) (option-ref opts 'cache-directory (%package-cachedir)))
(%build-remote? (option-ref opts 'build-remote #f))
(%use-substitutes? (option-ref opts 'use-substitutes #f)) (%use-substitutes? (option-ref opts 'use-substitutes #f))
(%fallback? (option-ref opts 'fallback #f)) (%fallback? (option-ref opts 'fallback #f))
(%record-events? (option-ref opts 'record-events #f)) (%record-events? (option-ref opts 'record-events #f))
@ -146,84 +149,86 @@ exec ${GUILE:-@GUILE@} --no-auto-compile -e main -s "$0" "$@"
(lambda () (lambda ()
(with-database (with-database
(with-queue-writer-worker (with-queue-writer-worker
(and specfile (and specfile
(let ((new-specs (save-module-excursion (let ((new-specs (save-module-excursion
(lambda () (lambda ()
(set-current-module (make-user-module '())) (set-current-module
(primitive-load specfile))))) (make-user-module '()))
(for-each db-add-specification new-specs))) (primitive-load specfile)))))
(for-each db-add-specification new-specs)))
(when queries-file (when queries-file
(log-message "Enable SQL query logging.") (log-message "Enable SQL query logging.")
(db-log-queries queries-file)) (db-log-queries queries-file))
(if one-shot? (if one-shot?
(process-specs (db-get-specifications)) (process-specs (db-get-specifications))
(let ((exit-channel (make-channel))) (let ((exit-channel (make-channel)))
(start-watchdog) (start-watchdog)
(if (option-ref opts 'web #f) (if (option-ref opts 'web #f)
(begin (begin
(spawn-fiber (spawn-fiber
(essential-task (essential-task
'web exit-channel 'web exit-channel
(lambda () (lambda ()
(run-cuirass-server #:host host #:port port))) (run-cuirass-server #:host host
#:parallel? #t) #:port port)))
#:parallel? #t)
(spawn-fiber (spawn-fiber
(essential-task (essential-task
'monitor exit-channel 'monitor exit-channel
(lambda () (lambda ()
(while #t (while #t
(log-monitoring-stats) (log-monitoring-stats)
(sleep 600)))))) (sleep 600))))))
(begin (begin
(clear-build-queue) (clear-build-queue)
;; If Cuirass was stopped during an evaluation, ;; If Cuirass was stopped during an evaluation,
;; abort it. Builds that were not registered ;; abort it. Builds that were not registered
;; during this evaluation will be registered ;; during this evaluation will be registered
;; during the next evaluation. ;; during the next evaluation.
(db-abort-pending-evaluations) (db-abort-pending-evaluations)
;; First off, restart builds that had not ;; First off, restart builds that had not
;; completed or were not even started on a ;; completed or were not even started on a
;; previous run. ;; previous run.
(spawn-fiber (spawn-fiber
(essential-task (essential-task
'restart-builds exit-channel 'restart-builds exit-channel
(lambda () (lambda ()
(restart-builds)))) (restart-builds))))
(spawn-fiber (spawn-fiber
(essential-task (essential-task
'build exit-channel 'build exit-channel
(lambda () (lambda ()
(while #t (while #t
(process-specs (db-get-specifications)) (process-specs (db-get-specifications))
(log-message (log-message
"next evaluation in ~a seconds" interval) "next evaluation in ~a seconds" interval)
(sleep interval))))) (sleep interval)))))
(spawn-fiber (spawn-fiber
(essential-task (essential-task
'metrics exit-channel 'metrics exit-channel
(lambda () (lambda ()
(while #t (while #t
(with-time-logging (with-time-logging
"Metrics update" "Metrics update"
(db-update-metrics)) (db-update-metrics))
(sleep 3600))))) (sleep 3600)))))
(spawn-fiber (spawn-fiber
(essential-task (essential-task
'monitor exit-channel 'monitor exit-channel
(lambda () (lambda ()
(while #t (while #t
(log-monitoring-stats) (log-monitoring-stats)
(sleep 600))))))) (sleep 600)))))))
(primitive-exit (get-message exit-channel))))))) (primitive-exit (get-message exit-channel)))))))
;; Most of our code is I/O so preemption doesn't matter much (it ;; Most of our code is I/O so preemption doesn't matter much (it
;; could help while we're doing SQL requests, for instance, but it ;; could help while we're doing SQL requests, for instance, but it

29
bin/remote-server.in Normal file
View File

@ -0,0 +1,29 @@
#!/bin/sh
# -*- scheme -*-
# @configure_input@
#GUILE_LOAD_PATH="@PACKAGE_LOAD_PATH@${GUILE_LOAD_PATH:+:}$GUILE_LOAD_PATH"
#GUILE_LOAD_COMPILED_PATH="@PACKAGE_LOAD_COMPILED_PATH@${GUILE_LOAD_COMPILED_PATH:+:}$GUILE_LOAD_COMPILED_PATH"
exec ${GUILE:-@GUILE@} --no-auto-compile -e main -s "$0" "$@"
!#
;;; remote-server.in -- Remote build server.
;;; Copyright © 2020 Mathieu Othacehe <othacehe@gnu.org>
;;;
;;; This file is part of Cuirass.
;;;
;;; Cuirass is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, either version 3 of the License, or
;;; (at your option) any later version.
;;;
;;; Cuirass is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with Cuirass. If not, see <http://www.gnu.org/licenses/>.
(use-modules (cuirass remote-server))
(define* (main #:optional (args (command-line)))
(remote-server (cdr args)))

29
bin/remote-worker.in Normal file
View File

@ -0,0 +1,29 @@
#!/bin/sh
# -*- scheme -*-
# @configure_input@
#GUILE_LOAD_PATH="@PACKAGE_LOAD_PATH@${GUILE_LOAD_PATH:+:}$GUILE_LOAD_PATH"
#GUILE_LOAD_COMPILED_PATH="@PACKAGE_LOAD_COMPILED_PATH@${GUILE_LOAD_COMPILED_PATH:+:}$GUILE_LOAD_COMPILED_PATH"
exec ${GUILE:-@GUILE@} --no-auto-compile -e main -s "$0" "$@"
!#
;;; remote-worker.in -- Remote build worker.
;;; Copyright © 2020 Mathieu Othacehe <othacehe@gnu.org>
;;;
;;; This file is part of Cuirass.
;;;
;;; Cuirass is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, either version 3 of the License, or
;;; (at your option) any later version.
;;;
;;; Cuirass is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with Cuirass. If not, see <http://www.gnu.org/licenses/>.
(use-modules (cuirass remote-worker))
(define* (main #:optional (args (command-line)))
(remote-worker (cdr args)))

View File

@ -22,8 +22,10 @@
(define-module (cuirass base) (define-module (cuirass base)
#:use-module (fibers) #:use-module (fibers)
#:use-module (fibers channels)
#:use-module (cuirass logging) #:use-module (cuirass logging)
#:use-module (cuirass database) #:use-module (cuirass database)
#:use-module (cuirass remote)
#:use-module (cuirass utils) #:use-module (cuirass utils)
#:use-module ((cuirass config) #:select (%localstatedir)) #:use-module ((cuirass config) #:select (%localstatedir))
#:use-module (gnu packages) #:use-module (gnu packages)
@ -36,9 +38,13 @@
#:use-module ((guix config) #:select (%state-directory)) #:use-module ((guix config) #:select (%state-directory))
#:use-module (git) #:use-module (git)
#:use-module (ice-9 binary-ports) #:use-module (ice-9 binary-ports)
#:use-module ((ice-9 suspendable-ports)
#:select (current-read-waiter
current-write-waiter))
#:use-module (ice-9 format) #:use-module (ice-9 format)
#:use-module (ice-9 match) #:use-module (ice-9 match)
#:use-module (ice-9 popen) #:use-module (ice-9 popen)
#:use-module (ice-9 ports internal)
#:use-module (ice-9 rdelim) #:use-module (ice-9 rdelim)
#:use-module (ice-9 receive) #:use-module (ice-9 receive)
#:use-module (ice-9 regex) #:use-module (ice-9 regex)
@ -58,6 +64,8 @@
fetch-inputs fetch-inputs
compile compile
evaluate evaluate
build-derivations&
set-build-successful!
clear-build-queue clear-build-queue
cancel-old-builds cancel-old-builds
restart-builds restart-builds
@ -70,6 +78,7 @@
%package-cachedir %package-cachedir
%gc-root-directory %gc-root-directory
%gc-root-ttl %gc-root-ttl
%build-remote?
%use-substitutes? %use-substitutes?
%fallback?)) %fallback?))
@ -102,6 +111,10 @@
(define time-monotonic time-tai)) (define time-monotonic time-tai))
(else #t)) (else #t))
(define %build-remote?
;; Define whether to use the remote build mechanism.
(make-parameter #f))
(define %use-substitutes? (define %use-substitutes?
;; Define whether to use substitutes ;; Define whether to use substitutes
(make-parameter #f)) (make-parameter #f))
@ -429,7 +442,7 @@ Essentially this procedure inverts the inversion-of-control that
(lambda _ (lambda _
(close-port output))))) (close-port output)))))
(values (non-blocking-port input) (values input
(lambda () (lambda ()
(match (atomic-box-ref result) (match (atomic-box-ref result)
((? condition? c) ((? condition? c)
@ -446,7 +459,7 @@ Essentially this procedure inverts the inversion-of-control that
;; Our shuffling algorithm is simple: we sort by .drv file name. :-) ;; Our shuffling algorithm is simple: we sort by .drv file name. :-)
(sort drv string<?)) (sort drv string<?))
(define (set-build-successful! drv) (define* (set-build-successful! drv #:optional log)
"Update the build status of DRV as successful and register any eventual "Update the build status of DRV as successful and register any eventual
build products." build products."
(let* ((build (db-get-build drv)) (let* ((build (db-get-build drv))
@ -456,7 +469,8 @@ build products."
(when (and spec build) (when (and spec build)
(create-build-outputs build (create-build-outputs build
(assq-ref spec #:build-outputs)))) (assq-ref spec #:build-outputs))))
(db-update-build-status! drv (build-status succeeded))) (db-update-build-status! drv (build-status succeeded)
#:log-file log))
(define (update-build-statuses! store lst) (define (update-build-statuses! store lst)
"Update the build status of the derivations listed in LST, which have just "Update the build status of the derivations listed in LST, which have just
@ -584,7 +598,7 @@ updating the database accordingly."
(log-message "bogus build-started event for '~a'" drv))) (log-message "bogus build-started event for '~a'" drv)))
(('build-remote drv host _ ...) (('build-remote drv host _ ...)
(log-message "'~a' offloaded to '~a'" drv host) (log-message "'~a' offloaded to '~a'" drv host)
(db-update-build-machine! drv host)) (db-update-build-worker! drv host))
(('build-succeeded drv _ ...) (('build-succeeded drv _ ...)
(if (valid? drv) (if (valid? drv)
(begin (begin
@ -642,7 +656,8 @@ started)."
;; Those in VALID can be restarted. If some of them were built in the ;; Those in VALID can be restarted. If some of them were built in the
;; meantime behind our back, that's fine: 'spawn-builds' will DTRT. ;; meantime behind our back, that's fine: 'spawn-builds' will DTRT.
(log-message "restarting ~a pending builds" (length valid)) (log-message "restarting ~a pending builds" (length valid))
(spawn-builds store valid) (unless (%build-remote?)
(spawn-builds store valid))
(log-message "done with restarted builds")))) (log-message "done with restarted builds"))))
(define (create-build-outputs build product-specs) (define (create-build-outputs build product-specs)
@ -682,16 +697,19 @@ by PRODUCT-SPECS."
(define (build-packages store jobs eval-id) (define (build-packages store jobs eval-id)
"Build JOBS and return a list of Build results." "Build JOBS and return a list of Build results."
(define derivations (define derivations
(with-time-logging (let* ((name (db-get-evaluation-specification eval-id))
(format #f "evaluation ~a registration" eval-id) (specification (db-get-specification name)))
(db-register-builds jobs eval-id))) (with-time-logging
(format #f "evaluation ~a registration" eval-id)
(db-register-builds jobs eval-id specification))))
(log-message "evaluation ~a registered ~a new derivations" (log-message "evaluation ~a registered ~a new derivations"
eval-id (length derivations)) eval-id (length derivations))
(db-set-evaluation-status eval-id (db-set-evaluation-status eval-id
(evaluation-status succeeded)) (evaluation-status succeeded))
(spawn-builds store derivations) (unless (%build-remote?)
(spawn-builds store derivations))
(let* ((results (filter-map (cut db-get-build <>) derivations)) (let* ((results (filter-map (cut db-get-build <>) derivations))
(status (map (cut assq-ref <> #:status) results)) (status (map (cut assq-ref <> #:status) results))

View File

@ -24,6 +24,7 @@
(define-module (cuirass database) (define-module (cuirass database)
#:use-module (cuirass logging) #:use-module (cuirass logging)
#:use-module (cuirass config) #:use-module (cuirass config)
#:use-module (cuirass remote)
#:use-module (cuirass utils) #:use-module (cuirass utils)
#:use-module (ice-9 match) #:use-module (ice-9 match)
#:use-module (ice-9 format) #:use-module (ice-9 format)
@ -60,7 +61,7 @@
db-add-build-product db-add-build-product
db-register-builds db-register-builds
db-update-build-status! db-update-build-status!
db-update-build-machine! db-update-build-worker!
db-get-output db-get-output
db-get-inputs db-get-inputs
db-get-build db-get-build
@ -82,6 +83,9 @@
db-get-evaluation-specification db-get-evaluation-specification
db-get-build-product-path db-get-build-product-path
db-get-build-products db-get-build-products
db-add-worker
db-get-workers
db-clear-workers
db-get-evaluation-summary db-get-evaluation-summary
db-get-checkouts db-get-checkouts
read-sql-file read-sql-file
@ -92,6 +96,7 @@
;; Constants. ;; Constants.
SQLITE_CONSTRAINT_PRIMARYKEY SQLITE_CONSTRAINT_PRIMARYKEY
SQLITE_CONSTRAINT_UNIQUE SQLITE_CONSTRAINT_UNIQUE
SQLITE_BUSY_SNAPSHOT
;; Parameters. ;; Parameters.
%package-database %package-database
%package-schema-file %package-schema-file
@ -106,6 +111,9 @@
with-database with-database
with-queue-writer-worker)) with-queue-writer-worker))
;; Maximum priority for a Build or Specification.
(define max-priority 9)
(define (%sqlite-exec db sql . args) (define (%sqlite-exec db sql . args)
"Evaluate the given SQL query with the given ARGS. Return the list of "Evaluate the given SQL query with the given ARGS. Return the list of
rows." rows."
@ -441,7 +449,7 @@ table."
(sqlite-exec db "\ (sqlite-exec db "\
INSERT OR IGNORE INTO Specifications (name, load_path_inputs, \ INSERT OR IGNORE INTO Specifications (name, load_path_inputs, \
package_path_inputs, proc_input, proc_file, proc, proc_args, \ package_path_inputs, proc_input, proc_file, proc, proc_args, \
build_outputs) \ build_outputs, priority) \
VALUES (" VALUES ("
(assq-ref spec #:name) ", " (assq-ref spec #:name) ", "
(assq-ref spec #:load-path-inputs) ", " (assq-ref spec #:load-path-inputs) ", "
@ -450,7 +458,8 @@ build_outputs) \
(assq-ref spec #:proc-file) ", " (assq-ref spec #:proc-file) ", "
(symbol->string (assq-ref spec #:proc)) ", " (symbol->string (assq-ref spec #:proc)) ", "
(assq-ref spec #:proc-args) ", " (assq-ref spec #:proc-args) ", "
(assq-ref spec #:build-outputs) ");") (assq-ref spec #:build-outputs) ", "
(or (assq-ref spec #:priority) max-priority) ");")
(let ((spec-id (last-insert-rowid db))) (let ((spec-id (last-insert-rowid db)))
(for-each (lambda (input) (for-each (lambda (input)
(db-add-input (assq-ref spec #:name) input)) (db-add-input (assq-ref spec #:name) input))
@ -504,7 +513,7 @@ SELECT * FROM Specifications ORDER BY name DESC;")))
(match rows (match rows
(() specs) (() specs)
((#(name load-path-inputs package-path-inputs proc-input proc-file proc ((#(name load-path-inputs package-path-inputs proc-input proc-file proc
proc-args build-outputs) proc-args build-outputs priority)
. rest) . rest)
(loop rest (loop rest
(cons `((#:name . ,name) (cons `((#:name . ,name)
@ -518,7 +527,8 @@ SELECT * FROM Specifications ORDER BY name DESC;")))
(#:proc-args . ,(with-input-from-string proc-args read)) (#:proc-args . ,(with-input-from-string proc-args read))
(#:inputs . ,(db-get-inputs name)) (#:inputs . ,(db-get-inputs name))
(#:build-outputs . (#:build-outputs .
,(with-input-from-string build-outputs read))) ,(with-input-from-string build-outputs read))
(#:priority . ,priority))
specs))))))) specs)))))))
(define-enumeration evaluation-status (define-enumeration evaluation-status
@ -622,15 +632,19 @@ string."
;; Extended error codes (see <sqlite3.h>). ;; Extended error codes (see <sqlite3.h>).
;; XXX: This should be defined by (sqlite3). ;; XXX: This should be defined by (sqlite3).
(define SQLITE_BUSY 5)
(define SQLITE_CONSTRAINT 19) (define SQLITE_CONSTRAINT 19)
(define SQLITE_CONSTRAINT_PRIMARYKEY (define SQLITE_CONSTRAINT_PRIMARYKEY
(logior SQLITE_CONSTRAINT (ash 6 8))) (logior SQLITE_CONSTRAINT (ash 6 8)))
(define SQLITE_CONSTRAINT_UNIQUE (define SQLITE_CONSTRAINT_UNIQUE
(logior SQLITE_CONSTRAINT (ash 8 8))) (logior SQLITE_CONSTRAINT (ash 8 8)))
(define SQLITE_BUSY_SNAPSHOT
(logior SQLITE_BUSY (ash 2 8)))
(define-enumeration build-status (define-enumeration build-status
;; Build status as expected by Hydra's API. Note: the negative values are ;; Build status as expected by Hydra's API. Note: the negative values are
;; Cuirass' own extensions. ;; Cuirass' own extensions.
(submitted -3)
(scheduled -2) (scheduled -2)
(started -1) (started -1)
(succeeded 0) (succeeded 0)
@ -662,7 +676,7 @@ Return #f otherwise. BUILD outputs are stored in the OUTPUTS table."
(with-db-writer-worker-thread/force db (with-db-writer-worker-thread/force db
(sqlite-exec db " (sqlite-exec db "
INSERT INTO Builds (derivation, evaluation, job_name, system, nix_name, log, INSERT INTO Builds (derivation, evaluation, job_name, system, nix_name, log,
status, timestamp, starttime, stoptime) status, priority, max_silent, timeout, timestamp, starttime, stoptime)
VALUES (" VALUES ("
(assq-ref build #:derivation) ", " (assq-ref build #:derivation) ", "
(assq-ref build #:eval-id) ", " (assq-ref build #:eval-id) ", "
@ -672,9 +686,12 @@ VALUES ("
(assq-ref build #:log) ", " (assq-ref build #:log) ", "
(or (assq-ref build #:status) (or (assq-ref build #:status)
(build-status scheduled)) ", " (build-status scheduled)) ", "
(or (assq-ref build #:timestamp) 0) ", " (assq-ref build #:priority) ", "
(or (assq-ref build #:starttime) 0) ", " (or (assq-ref build #:max-silent) 0) ", "
(or (assq-ref build #:stoptime) 0) ");") (or (assq-ref build #:timeout) 0) ", "
(or (assq-ref build #:timestamp) 0) ", "
(or (assq-ref build #:starttime) 0) ", "
(or (assq-ref build #:stoptime) 0) ");")
(let* ((derivation (assq-ref build #:derivation)) (let* ((derivation (assq-ref build #:derivation))
(outputs (assq-ref build #:outputs)) (outputs (assq-ref build #:outputs))
(new-outputs (filter-map (cut db-add-output derivation <>) (new-outputs (filter-map (cut db-add-output derivation <>)
@ -702,7 +719,7 @@ path) VALUES ("
(assq-ref product #:path) ");") (assq-ref product #:path) ");")
(last-insert-rowid db))) (last-insert-rowid db)))
(define (db-register-builds jobs eval-id) (define (db-register-builds jobs eval-id specification)
(define (new-outputs? outputs) (define (new-outputs? outputs)
(let ((new-outputs (let ((new-outputs
(filter-map (match-lambda (filter-map (match-lambda
@ -712,16 +729,23 @@ path) VALUES ("
outputs))) outputs)))
(not (null? new-outputs)))) (not (null? new-outputs))))
(define (build-priority priority)
(let ((spec-priority (assq-ref specification #:priority)))
(+ (* spec-priority 10) priority)))
(define (register job) (define (register job)
(let* ((name (assq-ref job #:job-name)) (let* ((name (assq-ref job #:job-name))
(drv (assq-ref job #:derivation)) (drv (assq-ref job #:derivation))
(job-name (assq-ref job #:job-name)) (job-name (assq-ref job #:job-name))
(system (assq-ref job #:system)) (system (assq-ref job #:system))
(nix-name (assq-ref job #:nix-name)) (nix-name (assq-ref job #:nix-name))
(log (assq-ref job #:log)) (log (assq-ref job #:log))
(period (assq-ref job #:period)) (period (assq-ref job #:period))
(outputs (assq-ref job #:outputs)) (priority (or (assq-ref job #:priority) max-priority))
(cur-time (time-second (current-time time-utc)))) (max-silent (assq-ref job #:max-silent-time))
(timeout (assq-ref job #:timeout))
(outputs (assq-ref job #:outputs))
(cur-time (time-second (current-time time-utc))))
(and (new-outputs? outputs) (and (new-outputs? outputs)
(let ((build `((#:derivation . ,drv) (let ((build `((#:derivation . ,drv)
(#:eval-id . ,eval-id) (#:eval-id . ,eval-id)
@ -734,12 +758,15 @@ path) VALUES ("
(#:log . ,(or log "")) (#:log . ,(or log ""))
(#:status . ,(build-status scheduled)) (#:status . ,(build-status scheduled))
(#:priority . ,(build-priority priority))
(#:max-silent . ,max-silent)
(#:timeout . ,timeout)
(#:outputs . ,outputs) (#:outputs . ,outputs)
(#:timestamp . ,cur-time) (#:timestamp . ,cur-time)
(#:starttime . 0) (#:starttime . 0)
(#:stoptime . 0)))) (#:stoptime . 0))))
(if period (if period
(let* ((spec (db-get-evaluation-specification eval-id)) (let* ((spec (assq-ref specification #:name))
(time (time
(db-get-time-since-previous-build job-name spec)) (db-get-time-since-previous-build job-name spec))
(add-build? (cond (add-build? (cond
@ -803,10 +830,10 @@ log file for DRV."
(#:event . ,(assq-ref status-names (#:event . ,(assq-ref status-names
status))))))))) status)))))))))
(define* (db-update-build-machine! drv machine) (define* (db-update-build-worker! drv worker)
"Update the database so that DRV's machine is MACHINE." "Update the database so that DRV's worker is WORKER."
(with-db-writer-worker-thread db (with-db-writer-worker-thread db
(sqlite-exec db "UPDATE Builds SET machine=" machine (sqlite-exec db "UPDATE Builds SET worker=" worker
"WHERE derivation=" drv ";"))) "WHERE derivation=" drv ";")))
(define (db-get-output path) (define (db-get-output path)
@ -955,6 +982,8 @@ CASE WHEN :borderlowid IS NULL THEN
;; before those in 'scheduled' state (-2). ;; before those in 'scheduled' state (-2).
(('order . 'status+submission-time) (('order . 'status+submission-time)
"Builds.status DESC, Builds.timestamp DESC, Builds.rowid ASC") "Builds.status DESC, Builds.timestamp DESC, Builds.rowid ASC")
(('order . 'priority+timestamp)
"Builds.priority DESC, Builds.timestamp ASC")
(_ "Builds.rowid DESC")))) (_ "Builds.rowid DESC"))))
;; XXX: Make sure that all filters are covered by an index. ;; XXX: Make sure that all filters are covered by an index.
@ -965,10 +994,12 @@ CASE WHEN :borderlowid IS NULL THEN
(derivation . "Builds.derivation = :derivation") (derivation . "Builds.derivation = :derivation")
(job . "Builds.job_name = :job") (job . "Builds.job_name = :job")
(system . "Builds.system = :system") (system . "Builds.system = :system")
(worker . "Builds.worker = :worker")
(evaluation . "Builds.evaluation = :evaluation") (evaluation . "Builds.evaluation = :evaluation")
(status . ,(match (assq-ref filters 'status) (status . ,(match (assq-ref filters 'status)
(#f #f) (#f #f)
('done "Builds.status >= 0") ('done "Builds.status >= 0")
('scheduled "Builds.status = -2")
('started "Builds.status = -1") ('started "Builds.status = -1")
('pending "Builds.status < 0") ('pending "Builds.status < 0")
('succeeded "Builds.status = 0") ('succeeded "Builds.status = 0")
@ -1031,7 +1062,8 @@ GROUP_CONCAT(Outputs.name), GROUP_CONCAT(Outputs.path),
GROUP_CONCAT(BP.rowid), GROUP_CONCAT(BP.type), GROUP_CONCAT(BP.file_size), GROUP_CONCAT(BP.rowid), GROUP_CONCAT(BP.type), GROUP_CONCAT(BP.file_size),
GROUP_CONCAT(BP.checksum), GROUP_CONCAT(BP.path) FROM GROUP_CONCAT(BP.checksum), GROUP_CONCAT(BP.path) FROM
(SELECT Builds.derivation, Builds.rowid, Builds.timestamp, Builds.starttime, (SELECT Builds.derivation, Builds.rowid, Builds.timestamp, Builds.starttime,
Builds.stoptime, Builds.log, Builds.status, Builds.job_name, Builds.stoptime, Builds.log, Builds.status, Builds.priority,
Builds.max_silent, Builds.timeout, Builds.job_name,
Builds.system, Builds.nix_name, Builds.evaluation, Builds.system, Builds.nix_name, Builds.evaluation,
Specifications.name Specifications.name
FROM Builds FROM Builds
@ -1070,7 +1102,8 @@ ORDER BY ~a;"
(sqlite-fold-right (sqlite-fold-right
(lambda (row result) (lambda (row result)
(match row (match row
(#(derivation id timestamp starttime stoptime log status job-name (#(derivation id timestamp starttime stoptime log status
priority max-silent timeout job-name
system nix-name eval-id specification system nix-name eval-id specification
outputs-name outputs-path outputs-name outputs-path
products-id products-type products-file-size products-id products-type products-file-size
@ -1082,6 +1115,9 @@ ORDER BY ~a;"
(#:stoptime . ,stoptime) (#:stoptime . ,stoptime)
(#:log . ,log) (#:log . ,log)
(#:status . ,status) (#:status . ,status)
(#:priority . ,priority)
(#:max-silent . ,max-silent)
(#:timeout . ,timeout)
(#:job-name . ,job-name) (#:job-name . ,job-name)
(#:system . ,system) (#:system . ,system)
(#:nix-name . ,nix-name) (#:nix-name . ,nix-name)
@ -1413,3 +1449,38 @@ WHERE build = " build-id))
(#:checksum . ,checksum) (#:checksum . ,checksum)
(#:path . ,path)) (#:path . ,path))
products))))))) products)))))))
(define (db-add-worker worker)
"Insert WORKER into Worker table."
(with-db-writer-worker-thread db
(sqlite-exec db "\
INSERT OR REPLACE INTO Workers (name, address, systems, last_seen)
VALUES ("
(worker-name worker) ", "
(worker-address worker) ", "
(string-join (worker-systems worker) ",") ", "
(worker-last-seen worker) ");")
(last-insert-rowid db)))
(define (db-get-workers)
"Return the workers in Workers table."
(with-db-worker-thread db
(let loop ((rows (sqlite-exec db "
SELECT name, address, systems, last_seen from Workers"))
(workers '()))
(match rows
(() (reverse workers))
((#(name address systems last-seen)
. rest)
(loop rest
(cons (worker
(name name)
(address address)
(systems (string-split systems #\,))
(last-seen last-seen))
workers)))))))
(define (db-clear-workers)
"Remove all workers from Workers table."
(with-db-writer-worker-thread db
(sqlite-exec db "DELETE FROM Workers;")))

View File

@ -28,6 +28,7 @@
#:use-module (cuirass metrics) #:use-module (cuirass metrics)
#:use-module (cuirass utils) #:use-module (cuirass utils)
#:use-module (cuirass logging) #:use-module (cuirass logging)
#:use-module (cuirass remote)
#:use-module (srfi srfi-1) #:use-module (srfi srfi-1)
#:use-module (srfi srfi-11) #:use-module (srfi srfi-11)
#:use-module (srfi srfi-26) #:use-module (srfi srfi-26)
@ -446,26 +447,11 @@ Hydra format."
(#:link . ,(string-append "/jobset/" (assq-ref build #:specification))))))) (#:link . ,(string-append "/jobset/" (assq-ref build #:specification)))))))
(respond-build-not-found id)))) (respond-build-not-found id))))
(('GET "build" (= string->number id) "log" "raw") (('GET "build" (= string->number id) "log" "raw")
(let ((build (and id (db-get-build id)))) (let* ((build (and id (db-get-build id)))
(if build (log (and build (assq-ref build #:log))))
(match (assq-ref build #:outputs) (if (and log (file-exists? log))
(((_ (#:path . (? string? output))) _ ...) (respond-gzipped-file log)
;; Redirect to a /log URL, which is assumed to be served (respond-not-found (uri->string (request-uri request))))))
;; by 'guix publish'.
(let ((uri (string->uri-reference
(string-append "/log/"
(basename output)))))
(respond (build-response #:code 302
#:headers `((location . ,uri)))
#:body "")))
(()
;; Not entry for ID in the 'Outputs' table.
(respond-json-with-error
500
(format #f "Outputs of build ~a are unknown." id)))
(#f
(respond-build-not-found id)))
(respond-build-not-found id))))
(('GET "output" id) (('GET "output" id)
(let ((output (db-get-output (let ((output (db-get-output
(string-append (%store-prefix) "/" id)))) (string-append (%store-prefix) "/" id))))
@ -661,6 +647,21 @@ Hydra format."
(respond-json-with-error 500 "No build found."))) (respond-json-with-error 500 "No build found.")))
(respond-json-with-error 500 "Query parameter not provided.")))) (respond-json-with-error 500 "Query parameter not provided."))))
(('GET "workers")
(respond-html
(html-page
"Workers status"
(let ((workers (db-get-workers)))
(workers-status
workers
(map (lambda (worker)
(let ((name (worker-name worker)))
(db-get-builds `((worker . ,name)
(status . started)
(order . status+submission-time)))))
workers)))
'())))
(('GET "metrics") (('GET "metrics")
(respond-html (respond-html
(metrics-page))) (metrics-page)))

View File

@ -329,42 +329,44 @@ timestamp) VALUES ("
(define (db-update-metrics) (define (db-update-metrics)
"Compute and update all available metrics in database." "Compute and update all available metrics in database."
(with-db-writer-worker-thread/force db (with-db-writer-worker-thread/force db
;; We can not update all evaluations metrics for performance reasons. (catch-sqlite-error
;; Limit to the evaluations that were added during the past three days. ;; We can not update all evaluations metrics for performance reasons.
(let ((specifications ;; Limit to the evaluations that were added during the past three days.
(map (cut assq-ref <> #:name) (db-get-specifications))) (let ((specifications
(evaluations (db-latest-evaluations))) (map (cut assq-ref <> #:name) (db-get-specifications)))
(sqlite-exec db "BEGIN TRANSACTION;") (evaluations (db-latest-evaluations)))
(sqlite-exec db "BEGIN TRANSACTION;")
(db-update-metric 'builds-per-day) (db-update-metric 'builds-per-day)
(db-update-metric 'new-derivations-per-day) (db-update-metric 'new-derivations-per-day)
(db-update-metric 'pending-builds) (db-update-metric 'pending-builds)
;; Update specification related metrics. ;; Update specification related metrics.
(for-each (lambda (spec) (for-each (lambda (spec)
(db-update-metric (db-update-metric
'average-10-last-eval-duration-per-spec spec) 'average-10-last-eval-duration-per-spec spec)
(db-update-metric (db-update-metric
'average-100-last-eval-duration-per-spec spec) 'average-100-last-eval-duration-per-spec spec)
(db-update-metric (db-update-metric
'average-eval-duration-per-spec spec) 'average-eval-duration-per-spec spec)
(db-update-metric (db-update-metric
'percentage-failure-10-last-eval-per-spec spec) 'percentage-failure-10-last-eval-per-spec spec)
(db-update-metric (db-update-metric
'percentage-failure-100-last-eval-per-spec spec) 'percentage-failure-100-last-eval-per-spec spec)
(db-update-metric (db-update-metric
'percentage-failed-eval-per-spec spec)) 'percentage-failed-eval-per-spec spec))
specifications) specifications)
;; Update evaluation related metrics. ;; Update evaluation related metrics.
(for-each (lambda (evaluation) (for-each (lambda (evaluation)
(db-update-metric (db-update-metric
'average-eval-build-start-time evaluation) 'average-eval-build-start-time evaluation)
(db-update-metric (db-update-metric
'average-eval-build-complete-time evaluation) 'average-eval-build-complete-time evaluation)
(db-update-metric (db-update-metric
'evaluation-completion-speed evaluation)) 'evaluation-completion-speed evaluation))
evaluations) evaluations)
(sqlite-exec db "COMMIT;")))) (sqlite-exec db "COMMIT;"))
(on SQLITE_BUSY_SNAPSHOT => #f))))

View File

@ -0,0 +1,497 @@
;;; remote-server.scm -- Remote build server.
;;; Copyright © 2020 Mathieu Othacehe <othacehe@gnu.org>
;;;
;;; This file is part of Cuirass.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
(define-module (cuirass remote-server)
#:use-module (cuirass base)
#:use-module (cuirass config)
#:use-module (cuirass database)
#:use-module (cuirass logging)
#:use-module (cuirass remote)
#:use-module (cuirass utils)
#:use-module (gcrypt pk-crypto)
#:use-module (guix avahi)
#:use-module (guix base32)
#:use-module (guix base64)
#:use-module (guix config)
#:use-module (guix derivations)
#:use-module (guix records)
#:use-module (guix packages)
#:use-module (guix pki)
#:use-module (guix scripts)
#:use-module ((guix store)
#:select (current-build-output-port
ensure-path
store-protocol-error?
with-store))
#:use-module (guix ui)
#:use-module (guix utils)
#:use-module (guix workers)
#:use-module (guix build download)
#:use-module (guix build syscalls)
#:use-module ((guix build utils) #:select (mkdir-p))
#:use-module (gcrypt hash)
#:use-module (gcrypt pk-crypto)
#:use-module (simple-zmq)
#:use-module (rnrs bytevectors)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-26)
#:use-module (srfi srfi-34)
#:use-module (srfi srfi-37)
#:use-module (ice-9 atomic)
#:use-module (ice-9 match)
#:use-module (ice-9 q)
#:use-module (ice-9 rdelim)
#:use-module (ice-9 regex)
#:use-module (ice-9 threads)
#:export (remote-server))
;; Indicate if the process has to be stopped.
(define %stop-process?
(make-atomic-box #f))
(define %cache-directory
(make-parameter #f))
(define %trigger-substitute-url
(make-parameter #f))
(define %private-key
(make-parameter #f))
(define %public-key
(make-parameter #f))
(define service-name
"Cuirass remote server")
(define (show-help)
(format #t (G_ "Usage: remote-server [OPTION]...
Start a remote build server.\n"))
(display (G_ "
-b, --backend-port=PORT listen worker connections on PORT"))
(display (G_ "
-l, --log-port=PORT listen build logs on PORT"))
(display (G_ "
-p, --publish-port=PORT publish substitutes on PORT"))
(display (G_ "
-D, --database=DB Use DB to read and store build results"))
(display (G_ "
-c, --cache=DIRECTORY cache built items to DIRECTORY"))
(display (G_ "
-t, --trigger-substitute-url=URL
trigger substitute baking at URL"))
(display (G_ "
-u, --user=USER change privileges to USER as soon as possible"))
(display (G_ "
--public-key=FILE use FILE as the public key for signatures"))
(display (G_ "
--private-key=FILE use FILE as the private key for signatures"))
(newline)
(display (G_ "
-h, --help display this help and exit"))
(display (G_ "
-V, --version display version information and exit"))
(newline)
(show-bug-report-information))
(define %options
(list (option '(#\h "help") #f #f
(lambda _
(show-help)
(exit 0)))
(option '(#\V "version") #f #f
(lambda _
(show-version-and-exit "guix publish")))
(option '(#\b "backend-port") #t #f
(lambda (opt name arg result)
(alist-cons 'backend-port (string->number* arg) result)))
(option '(#\l "log-port") #t #f
(lambda (opt name arg result)
(alist-cons 'log-port (string->number* arg) result)))
(option '(#\p "publish-port") #t #f
(lambda (opt name arg result)
(alist-cons 'publish-port (string->number* arg) result)))
(option '(#\D "database") #t #f
(lambda (opt name arg result)
(alist-cons 'database arg result)))
(option '(#\c "cache") #t #f
(lambda (opt name arg result)
(alist-cons 'cache arg result)))
(option '(#\t "trigger-substitute-url") #t #f
(lambda (opt name arg result)
(alist-cons 'trigger-substitute-url arg result)))
(option '(#\u "user") #t #f
(lambda (opt name arg result)
(alist-cons 'user arg result)))
(option '("public-key") #t #f
(lambda (opt name arg result)
(alist-cons 'public-key-file arg result)))
(option '("private-key") #t #f
(lambda (opt name arg result)
(alist-cons 'private-key-file arg result)))))
(define %default-options
`((backend-port . 5555)
(log-port . 5556)
(publish-port . 5557)
(public-key-file . ,%public-key-file)
(private-key-file . ,%private-key-file)))
;;;
;;; Build workers.
;;;
(define %workers
;; Set of connected workers.
(make-hash-table))
(define (pop-build name)
(define (random-system systems)
(list-ref systems (random (length systems))))
(let ((worker (hash-ref %workers name)))
(and worker
(let ((system (random-system
(worker-systems worker))))
(match (db-get-builds `((status . scheduled)
(system . ,system)
(order . priority+timestamp)
(nr . 1)))
((build) build)
(() #f))))))
(define (remove-unresponsive-workers!)
(let ((unresponsive
(hash-fold (lambda (key value old)
(let* ((last-seen (worker-last-seen value))
(diff (- (current-time) last-seen)))
(if (> diff (%worker-timeout))
(cons key old)
old)))
'()
%workers)))
(for-each (lambda (worker)
(hash-remove! %workers worker))
unresponsive)))
(define* (read-worker-exp exp #:key reply-worker)
"Read the given EXP sent by a worker. REPLY-WORKER is a procedure that can
be used to reply to the worker."
(define (update-workers! base-worker proc)
(let* ((worker* (worker
(inherit (sexp->worker base-worker))
(last-seen (current-time))))
(name (worker-name worker*)))
(proc name)
(hash-set! %workers name worker*)))
(match (zmq-read-message exp)
(('worker-ready worker)
(update-workers! worker
(lambda (name)
(log-message (G_ "Worker `~a' is ready.") name))))
(('worker-request-work name)
(let ((build (pop-build name)))
(if build
(let ((derivation (assq-ref build #:derivation))
(priority (assq-ref build #:priority))
(timeout (assq-ref build #:timeout))
(max-silent (assq-ref build #:max-silent)))
(db-update-build-worker! derivation name)
(db-update-build-status! derivation (build-status submitted))
(reply-worker
(zmq-build-request-message derivation
#:priority priority
#:timeout timeout
#:max-silent max-silent)))
(reply-worker
(zmq-no-build-message)))))
(('worker-ping worker)
(update-workers! worker (const #t))
(db-clear-workers)
(hash-for-each (lambda (key value)
(db-add-worker value))
%workers))
(('build-started ('drv drv) ('worker worker))
(let ((log-file (log-path (%cache-directory) drv)))
(log-message "build started: '~a' on ~a." drv worker)
(db-update-build-worker! drv worker)
(db-update-build-status! drv (build-status started)
#:log-file log-file)))))
;;;
;;; Fetch workers.
;;;
(define (zmq-fetch-workers-endpoint)
"inproc://fetch-workers")
(define (zmq-fetch-worker-socket)
"Return a socket used to communicate with the fetch workers."
(let ((socket (zmq-create-socket %zmq-context ZMQ_PULL))
(endpoint (zmq-fetch-workers-endpoint)))
(zmq-connect socket endpoint)
socket))
(define (url-fetch* url file)
(parameterize ((current-output-port (%make-void-port "w"))
(current-error-port (%make-void-port "w")))
(url-fetch url file)))
(define (publish-narinfo-url publish-url store-hash)
"Return the URL of STORE-HASH narinfo file on PUBLISH-URL."
(let ((hash (and=> (string-index store-hash #\-)
(cut string-take store-hash <>))))
(format #f "~a/~a.narinfo" publish-url hash)))
(define (ensure-path* store output)
(guard (c ((store-protocol-error? c)
(log-message "Failed to add ~a to store." output)
#f))
(ensure-path store output)))
(define (add-to-store outputs url)
"Add the OUTPUTS that are available from the substitute server at URL to the
store."
(parameterize ((current-build-output-port (%make-void-port "w")))
(with-store store
(set-build-options* store url)
(for-each (lambda (output)
(ensure-path* store output))
(map derivation-output-path outputs)))))
(define (trigger-substitutes-baking outputs url)
(for-each (lambda (output)
(let* ((path (derivation-output-path output))
(store-hash (strip-store-prefix path))
(narinfo-url (publish-narinfo-url url store-hash)))
(call-with-temporary-output-file
(lambda (tmp-file port)
(url-fetch* narinfo-url tmp-file)))))
outputs))
(define (need-fetching? message)
"Return #t if the received MESSAGE implies that some output fetching is
required and #f otherwise."
(match (zmq-read-message message)
(('build-succeeded _ ...)
#t)
(('build-failed _ ...)
#t)
(else #f)))
(define* (run-fetch message)
"Read MESSAGE and download the corresponding build outputs. If
%CACHE-DIRECTORY is set, download the matching NAR and NARINFO files in this
directory."
(define (build-outputs drv)
(catch 'system-error
(lambda ()
(map (match-lambda
((output-name . output)
output))
(derivation-outputs
(read-derivation-from-file drv))))
(const '())))
(match (zmq-read-message message)
(('build-succeeded ('drv drv) ('url url) _ ...)
(let ((outputs (build-outputs drv)))
(add-to-store outputs url)
(when (%trigger-substitute-url)
(trigger-substitutes-baking outputs (%trigger-substitute-url)))
(log-message "build succeeded: '~a'" drv)
(set-build-successful! drv)))
(('build-failed ('drv drv) ('url url) _ ...)
(log-message "build failed: '~a'" drv)
(db-update-build-status! drv (build-status failed)))))
(define (start-fetch-worker name)
"Start a fetch worker thread with the given NAME. This worker takes care of
downloading build outputs. It communicates with the remote server using a ZMQ
socket."
(call-with-new-thread
(lambda ()
(set-thread-name name)
(let ((socket (zmq-fetch-worker-socket)))
(let loop ()
(match (zmq-get-msg-parts-bytevector socket)
((message)
(run-fetch (bv->string message))))
(loop))))))
;;;
;;; ZMQ connection.
;;;
(define %zmq-context
(zmq-create-context))
(define (zmq-backend-endpoint backend-port)
"Return a ZMQ endpoint string allowing TCP connections on BACKEND-PORT from
all network interfaces."
(string-append "tcp://*:" (number->string backend-port)))
(define (zmq-start-proxy backend-port)
"This procedure starts a proxy between client connections from the IPC
frontend to the workers connected through the TCP backend."
(define (socket-ready? items socket)
(find (lambda (item)
(eq? (poll-item-socket item) socket))
items))
(let* ((build-socket
(zmq-create-socket %zmq-context ZMQ_ROUTER))
(fetch-socket
(zmq-create-socket %zmq-context ZMQ_PUSH))
(poll-items (list
(poll-item build-socket ZMQ_POLLIN))))
(zmq-bind-socket build-socket (zmq-backend-endpoint backend-port))
(zmq-bind-socket fetch-socket (zmq-fetch-workers-endpoint))
;; Do not use the built-in zmq-proxy as we want to edit the envelope of
;; frontend messages before forwarding them to the backend.
(let loop ()
(let ((items (zmq-poll* poll-items 1000)))
(when (zmq-socket-ready? items build-socket)
(match (zmq-get-msg-parts-bytevector build-socket)
((worker empty rest)
(let ((reply-worker
(lambda (message)
(zmq-send-msg-parts-bytevector
build-socket
(list worker
(zmq-empty-delimiter)
(string->bv message))))))
(if (need-fetching? (bv->string rest))
(zmq-send-bytevector fetch-socket rest)
(read-worker-exp (bv->string rest)
#:reply-worker reply-worker))))))
(remove-unresponsive-workers!)
(loop)))))
;;;
;;; Entry point.
;;;
;; The PID of the publish process.
(define %publish-pid
(make-atomic-box #f))
;; The thread running the Avahi publish service.
(define %avahi-thread
(make-atomic-box #f))
(define (signal-handler)
"Catch SIGINT to stop the Avahi event loop and the publish process before
exiting."
(sigaction SIGINT
(lambda (signum)
(let ((publish-pid (atomic-box-ref %publish-pid))
(avahi-thread (atomic-box-ref %avahi-thread)))
(atomic-box-set! %stop-process? #t)
(and publish-pid
(begin
(kill publish-pid SIGHUP)
(waitpid publish-pid)))
(and avahi-thread
(join-thread avahi-thread))
(exit 1)))))
(define (gather-user-privileges user)
"switch to the identity of user, a user name."
(catch 'misc-error
(lambda ()
(let ((user (getpw user)))
(setgroups #())
(setgid (passwd:gid user))
(setuid (passwd:uid user))))
(lambda (key proc message args . rest)
(leave (G_ "user '~a' not found: ~a~%")
user (apply format #f message args)))))
(define (remote-server args)
(signal-handler)
(with-error-handling
(let* ((opts (args-fold* args %options
(lambda (opt name arg result)
(leave (G_ "~A: unrecognized option~%") name))
(lambda (arg result)
(leave (G_ "~A: extraneous argument~%") arg))
%default-options))
(backend-port (assoc-ref opts 'backend-port))
(log-port (assoc-ref opts 'log-port))
(publish-port (assoc-ref opts 'publish-port))
(cache (assoc-ref opts 'cache))
(database (assoc-ref opts 'database))
(trigger-substitute-url (assoc-ref opts 'trigger-substitute-url))
(user (assoc-ref opts 'user))
(public-key
(read-file-sexp
(assoc-ref opts 'public-key-file)))
(private-key
(read-file-sexp
(assoc-ref opts 'private-key-file))))
(parameterize ((%cache-directory cache)
(%trigger-substitute-url trigger-substitute-url)
(%package-database database)
(%public-key public-key)
(%private-key private-key))
(when user
(gather-user-privileges user))
(atomic-box-set!
%publish-pid
(publish-server publish-port
#:public-key public-key
#:private-key private-key))
(atomic-box-set!
%avahi-thread
(avahi-publish-service-thread
service-name
#:type remote-server-service-type
#:port backend-port
#:stop-loop? (lambda ()
(atomic-box-ref %stop-process?))
#:txt (list (string-append "log-port="
(number->string log-port))
(string-append "publish-port="
(number->string publish-port)))))
(receive-logs log-port (%cache-directory))
(with-database
(for-each (lambda (number)
(start-fetch-worker
(string-append "fetch-worker-"
(number->string number))))
(iota 4))
(zmq-start-proxy backend-port))))))

View File

@ -0,0 +1,382 @@
;;; remote-worker.scm -- Remote build worker.
;;; Copyright © 2020 Mathieu Othacehe <othacehe@gnu.org>
;;;
;;; This file is part of Cuirass.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
(define-module (cuirass remote-worker)
#:use-module (cuirass base)
#:use-module (cuirass remote)
#:use-module (gcrypt pk-crypto)
#:use-module (guix avahi)
#:use-module (guix config)
#:use-module (guix derivations)
#:use-module (guix diagnostics)
#:use-module (guix pki)
#:use-module (guix records)
#:use-module (guix scripts)
#:use-module (guix serialization)
#:use-module ((guix store)
#:select (current-build-output-port
store-error?
store-protocol-error?
store-protocol-error-message
with-store))
#:use-module (guix ui)
#:use-module (guix utils)
#:use-module (guix build syscalls)
#:use-module (guix build utils)
#:use-module (guix scripts publish)
#:use-module (simple-zmq)
#:use-module (rnrs bytevectors)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-26)
#:use-module (srfi srfi-34)
#:use-module (srfi srfi-37)
#:use-module (ice-9 atomic)
#:use-module (ice-9 match)
#:use-module (ice-9 threads)
#:export (remote-worker))
;; Indicate if the process has to be stopped.
(define %stop-process?
(make-atomic-box #f))
(define (show-help)
(format #t (G_ "Usage: remote-worker [OPTION]...
Start a remote build worker.\n"))
(display (G_ "
-w, --workers=COUNT start COUNT parallel workers"))
(display (G_ "
-p, --publish-port=PORT publish substitutes on PORT"))
(display (G_ "
-S, --server=SERVER connect to SERVER"))
(display (G_ "
-s, --systems=SYSTEMS list of supported SYSTEMS"))
(display (G_ "
--public-key=FILE use FILE as the public key for signatures"))
(display (G_ "
--private-key=FILE use FILE as the private key for signatures"))
(newline)
(display (G_ "
-h, --help display this help and exit"))
(display (G_ "
-V, --version display version information and exit"))
(newline)
(show-bug-report-information))
(define %options
(list (option '(#\h "help") #f #f
(lambda _
(show-help)
(exit 0)))
(option '(#\V "version") #f #f
(lambda _
(show-version-and-exit "guix publish")))
(option '(#\a "address") #t #f
(lambda (opt name arg result)
(alist-cons 'address arg result)))
(option '(#\w "workers") #t #f
(lambda (opt name arg result)
(alist-cons 'workers (string->number* arg) result)))
(option '(#\p "publish-port") #t #f
(lambda (opt name arg result)
(alist-cons 'publish-port (string->number* arg) result)))
(option '(#\s "server") #t #f
(lambda (opt name arg result)
(alist-cons 'server arg result)))
(option '(#\S "systems") #t #f
(lambda (opt name arg result)
(alist-cons 'systems
(string-split arg #\,) result)))
(option '("public-key") #t #f
(lambda (opt name arg result)
(alist-cons 'public-key-file arg result)))
(option '("private-key") #t #f
(lambda (opt name arg result)
(alist-cons 'private-key-file arg result)))))
(define %default-options
`((workers . 1)
(publish-port . 5558)
(systems . ,(list (%current-system)))
(public-key-file . ,%public-key-file)
(private-key-file . ,%private-key-file)))
;;;
;;; ZMQ connection.
;;;
(define %zmq-context
(zmq-create-context))
(define (zmq-backend-endpoint address port)
"Return a ZMQ endpoint identifying the build server available by TCP at
ADDRESS and PORT."
(string-append "tcp://" address ":" (number->string port)))
(define (zmq-dealer-socket)
"The ZMQ socket to communicate with the worker threads."
(zmq-create-socket %zmq-context ZMQ_DEALER))
;;;
;;; Worker.
;;;
;; The port of the local publish server.
(define %local-publish-port
(make-atomic-box #f))
(define (local-publish-url address)
"Return the URL of the local publish server."
(let ((port (atomic-box-ref %local-publish-port)))
(publish-url address port)))
(define* (run-build drv server
#:key
reply
timeout
max-silent
worker)
"Build DRV and send messages upon build start, failure or completion to the
build server identified by SERVICE-NAME using the REPLY procedure.
The publish server of the build server is added to the list of the store
substitutes-urls. This way derivations that are not present on the worker can
still be substituted."
(with-store store
(let ((address (server-address server))
(log-port (server-log-port server))
(publish-url (server-publish-url server))
(local-publish-url (worker-publish-url worker))
(name (worker-name worker)))
(set-build-options* store publish-url
#:timeout timeout
#:max-silent max-silent)
(reply (zmq-build-started-message drv name))
(guard (c ((store-protocol-error? c)
(info (G_ "Derivation `~a' build failed: ~a~%")
drv (store-protocol-error-message c))
(reply (zmq-build-failed-message drv local-publish-url))))
(let ((result
(let-values (((port finish)
(build-derivations& store (list drv))))
(send-log address log-port drv port)
(close-port port)
(finish))))
(if result
(begin
(info (G_ "Derivation ~a build succeeded.~%") drv)
(reply (zmq-build-succeeded-message drv local-publish-url)))
(begin
(info (G_ "Derivation ~a build failed.~%") drv)
(reply
(zmq-build-failed-message drv local-publish-url)))))))))
(define* (run-command command server
#:key
reply worker)
"Run COMMAND. SERVICE-NAME is the name of the build server that sent the
command. REPLY is a procedure that can be used to reply to this server."
(match (zmq-read-message command)
(('build ('drv drv)
('priority priority)
('timeout timeout)
('max-silent max-silent)
('timestamp timestamp)
('system system))
(info (G_ "Building `~a' derivation.~%") drv)
(run-build drv server
#:reply reply
#:worker worker
#:timeout timeout
#:max-silent max-silent))
(('no-build)
#t)))
(define (worker-ping worker server)
(define (ping socket)
(zmq-send-msg-parts-bytevector
socket
(list (make-bytevector 0)
(string->bv
(zmq-worker-ping (worker->sexp worker))))))
(call-with-new-thread
(lambda ()
(let* ((socket (zmq-dealer-socket))
(address (server-address server))
(port (server-port server))
(endpoint (zmq-backend-endpoint address port)))
(zmq-connect socket endpoint)
(let loop ()
(ping socket)
(sleep 60)
(loop))))))
(define (start-worker worker server)
"Start a worker thread named NAME, reading commands from the DEALER socket
and executing them. The worker can reply on the same socket."
(define (reply socket)
(lambda (message)
(zmq-send-msg-parts-bytevector
socket
(list (zmq-empty-delimiter) (string->bv message)))))
(define (ready socket)
(zmq-send-msg-parts-bytevector
socket
(list (make-bytevector 0)
(string->bv
(zmq-worker-ready-message (worker->sexp worker))))))
(define (request-work socket)
(let ((name (worker-name worker)))
(zmq-send-msg-parts-bytevector
socket
(list (make-bytevector 0)
(string->bv (zmq-worker-request-work-message name))))))
(match (primitive-fork)
(0
(set-thread-name (worker-name worker))
(let* ((socket (zmq-dealer-socket))
(address (server-address server))
(port (server-port server))
(endpoint (zmq-backend-endpoint address port)))
(zmq-connect socket endpoint)
(ready socket)
(worker-ping worker server)
(let loop ()
(request-work socket)
(match (zmq-get-msg-parts-bytevector socket '())
((empty command)
(run-command (bv->string command) server
#:reply (reply socket)
#:worker worker)))
(sleep 10)
(loop))))
(pid pid)))
;;;
;;; Entry point.
;;;
;; The PID of the publish process.
(define %publish-pid
(make-atomic-box #f))
(define %worker-pids
(make-atomic-box '()))
(define (load-server file)
(let ((user-module (make-user-module '((cuirass remote)))))
(load* file user-module)))
(define (add-to-worker-pids! pid)
(let ((pids (atomic-box-ref %worker-pids)))
(atomic-box-set! %worker-pids (cons pid pids))))
(define (signal-handler)
"Catch SIGINT to stop the Avahi event loop and the publish process before
exiting."
(sigaction SIGINT
(lambda (signum)
(let ((publish-pid (atomic-box-ref %publish-pid))
(worker-pids (atomic-box-ref %worker-pids)))
(atomic-box-set! %stop-process? #t)
(for-each (lambda (pid)
(when pid
(kill pid SIGKILL)
(waitpid pid)))
(cons publish-pid worker-pids))
(exit 1)))))
(define (remote-worker args)
(signal-handler)
(with-error-handling
(let* ((opts (args-fold* args %options
(lambda (opt name arg result)
(leave (G_ "~A: unrecognized option~%") name))
(lambda (arg result)
(leave (G_ "~A: extraneous argument~%") arg))
%default-options))
(address (assoc-ref opts 'address))
(workers (assoc-ref opts 'workers))
(publish-port (assoc-ref opts 'publish-port))
(server (assoc-ref opts 'server))
(systems (assoc-ref opts 'systems))
(public-key
(read-file-sexp
(assoc-ref opts 'public-key-file)))
(private-key
(read-file-sexp
(assoc-ref opts 'private-key-file))))
(atomic-box-set! %local-publish-port publish-port)
(atomic-box-set!
%publish-pid
(publish-server publish-port
#:public-key public-key
#:private-key private-key))
(when (and server (not address))
(leave (G_ "Address must be set when server is provided.~%")))
(if server
(let ((server (load-server server)))
(for-each
(lambda (n)
(let ((publish-url (local-publish-url address)))
(add-to-worker-pids!
(start-worker (worker
(address address)
(publish-url publish-url)
(name (generate-worker-name))
(systems systems))
server))))
(iota workers))
(while #t
(sleep 1)))
(avahi-browse-service-thread
(lambda (action service)
(case action
((new-service)
(for-each
(lambda (n)
(let* ((address (or address
(avahi-service-local-address service)))
(publish-url (local-publish-url address)))
(add-to-worker-pids!
(start-worker (worker
(address address)
(publish-url publish-url)
(name (generate-worker-name))
(systems systems))
(avahi-service->server service)))))
(iota workers)))))
#:ignore-local? #f
#:types (list remote-server-service-type)
#:stop-loop? (lambda ()
(atomic-box-ref %stop-process?)))))))

437
src/cuirass/remote.scm Normal file
View File

@ -0,0 +1,437 @@
;;; remote.scm -- Build on remote machines.
;;; Copyright © 2020 Mathieu Othacehe <othacehe@gnu.org>
;;;
;;; This file is part of Cuirass.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
(define-module (cuirass remote)
#:use-module (cuirass logging)
#:use-module (guix avahi)
#:use-module (guix config)
#:use-module (guix derivations)
#:use-module (guix records)
#:use-module (guix store)
#:use-module (guix ui)
#:use-module (guix utils)
#:use-module (guix build download)
#:use-module (guix build syscalls)
#:use-module ((guix build utils) #:select (dump-port mkdir-p))
#:use-module (guix scripts publish)
#:use-module (simple-zmq)
#:use-module (zlib)
#:use-module (rnrs bytevectors)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-26)
#:use-module (ice-9 match)
#:use-module (ice-9 rdelim)
#:use-module (ice-9 threads)
#:export (worker
worker?
worker-address
worker-name
worker-publish-url
worker-systems
worker-last-seen
worker->sexp
sexp->worker
generate-worker-name
%worker-timeout
server
server?
server-address
server-port
server-log-port
server-publish-url
publish-url
avahi-service->server
publish-server
set-build-options*
strip-store-prefix
log-path
receive-logs
send-log
zmq-poll*
zmq-socket-ready?
zmq-empty-delimiter
zmq-build-request-message
zmq-no-build-message
zmq-build-started-message
zmq-build-failed-message
zmq-build-succeeded-message
zmq-worker-ping
zmq-worker-ready-message
zmq-worker-request-work-message
zmq-read-message
remote-server-service-type))
;;;
;;; Workers.
;;;
(define-record-type* <worker>
worker make-worker
worker?
(address worker-address)
(name worker-name)
(publish-url worker-publish-url
(default #f))
(systems worker-systems)
(last-seen worker-last-seen
(default 0)))
(define (worker->sexp worker)
"Return an sexp describing WORKER."
(let ((address (worker-address worker))
(name (worker-name worker))
(systems (worker-systems worker))
(last-seen (worker-last-seen worker)))
`(worker
(address ,address)
(name ,name)
(systems ,systems)
(last-seen ,last-seen))))
(define (sexp->worker sexp)
"Turn SEXP, an sexp as returned by 'worker->sexp', into a <worker> record."
(match sexp
(('worker ('address address)
('name name)
('systems systems)
('last-seen last-seen))
(worker
(address address)
(name name)
(systems systems)
(last-seen last-seen)))))
(define %seed
(seed->random-state
(logxor (getpid) (car (gettimeofday)))))
(define (integer->alphanumeric-char n)
"Map N, an integer in the [0..62] range, to an alphanumeric character."
(cond ((< n 10)
(integer->char (+ (char->integer #\0) n)))
((< n 36)
(integer->char (+ (char->integer #\A) (- n 10))))
((< n 62)
(integer->char (+ (char->integer #\a) (- n 36))))
(else
(error "integer out of bounds" n))))
(define (random-string len)
"Compute a random string of size LEN where each character is alphanumeric."
(let loop ((chars '())
(len len))
(if (zero? len)
(list->string chars)
(let ((n (random 62 %seed)))
(loop (cons (integer->alphanumeric-char n) chars)
(- len 1))))))
(define (generate-worker-name)
"Return the service name of the server."
(string-append (gethostname) "-" (random-string 4)))
(define %worker-timeout
(make-parameter 120))
;;;
;;; Server.
;;;
(define-record-type* <server>
server make-server
server?
(address server-address)
(port server-port)
(log-port server-log-port)
(publish-url server-publish-url))
(define (publish-url address port)
"Return the publish url at ADDRESS and PORT."
(string-append "http://" address ":" (number->string port)))
(define (avahi-service->params service)
"Return the URL of the publish server corresponding to the service with the
given NAME."
(define (service-txt->params txt)
"Parse the service TXT record."
(fold (lambda (param params)
(match (string-split param #\=)
((key value)
(cons (cons (string->symbol key) value)
params))))
'()
txt))
(define (number-param params param)
(string->number (assq-ref params param)))
(let* ((address (avahi-service-address service))
(txt (avahi-service-txt service))
(params (service-txt->params txt))
(log-port (number-param params 'log-port))
(publish-port (number-param params 'publish-port))
(publish-url (publish-url address publish-port)))
`((#:log-port . ,log-port)
(#:publish-url . ,publish-url))))
(define (avahi-service->server service)
(let* ((address (avahi-service-address service))
(port (avahi-service-port service))
(params (avahi-service->params service))
(log-port (assq-ref params #:log-port))
(publish-url (assq-ref params #:publish-url)))
(server
(address address)
(port port)
(log-port log-port)
(publish-url publish-url))))
;;;
;;; Store publishing.
;;;
(define* (set-build-options* store url
#:key
timeout
max-silent)
"Add URL to the list of STORE substitutes-urls."
(set-build-options store
#:use-substitutes? #t
#:fallback? #t
#:keep-going? #t
#:timeout timeout
#:max-silent-time max-silent
#:verbosity 1
#:substitute-urls
(cons url %default-substitute-urls)))
(define* (publish-server port
#:key
public-key
private-key)
"This procedure starts a publishing server listening on PORT in a new
process and returns the pid of the forked process. Use PUBLIC-KEY and
PRIVATE-KEY to sign narinfos."
(match (primitive-fork)
(0
(parameterize ((%public-key public-key)
(%private-key private-key))
(with-store store
(let ((log-file (open-file "/tmp/publish.log" "w")))
(close-fdes 1)
(close-fdes 2)
(dup2 (fileno log-file) 1)
(dup2 (fileno log-file) 2)
(close-port log-file)
(let* ((address (make-socket-address AF_INET INADDR_ANY 0))
(socket-address
(make-socket-address (sockaddr:fam address)
(sockaddr:addr address)
port))
(socket (open-server-socket socket-address)))
(run-publish-server socket store
#:compressions
(list %default-gzip-compression)))))))
(pid pid)))
;;;
;;; Logs.
;;;
(define (strip-store-prefix file)
; Given a file name like "/gnu/store/…-foo-1.2/bin/foo", return
;; "/bin/foo".
(let* ((len (string-length %store-directory))
(base (string-drop file (+ 1 len))))
(match (string-index base #\/)
(#f base)
(index (string-drop base index)))))
(define (log-path cache derivation)
(let* ((store-hash (strip-store-prefix derivation))
(hash (and=> (string-index store-hash #\-)
(cut string-take store-hash <>))))
(string-append cache "/" hash ".log.gz")))
(define (receive-logs port cache)
(define (read-log port)
(match (false-if-exception (read port))
(('log ('version 0)
('derivation derivation))
(let ((file (log-path cache derivation)))
(call-with-output-file file
(lambda (output)
(dump-port port output)))))
(_
(log-message "invalid log received.~%")
#f)))
(define (wait-for-client port proc)
(let ((sock (socket AF_INET SOCK_STREAM 0)))
(setsockopt sock SOL_SOCKET SO_REUSEADDR 1)
(bind sock AF_INET INADDR_ANY port)
(listen sock 1024)
(while #t
(match (select (list sock) '() '() 60)
(((_) () ())
(match (accept sock)
((client . address)
(write '(log-server (version 0)) client)
(force-output client)
(proc client))))
((() () ())
#f)))))
(define (client-handler client)
(call-with-new-thread
(lambda ()
(set-thread-name
(string-append "log-server-"
(number->string (port->fdes client))))
(and=> client read-log)
(when client
(close-port client)))))
(call-with-new-thread
(lambda ()
(set-thread-name "log-server")
(wait-for-client port client-handler))))
(define* (send-log address port derivation log)
(let* ((sock (socket AF_INET SOCK_STREAM 0))
(in-addr (inet-pton AF_INET address))
(addr (make-socket-address AF_INET in-addr port)))
(connect sock addr)
(match (select (list sock) '() '() 10)
(((_) () ())
(match (read sock)
(('log-server ('version version ...))
(let ((header `(log
(version 0)
(derivation ,derivation))))
(write header sock)
(call-with-gzip-output-port sock
(lambda (sock-compressed)
(dump-port log sock-compressed)))
(close-port sock)))
(x
(log-message "invalid handshake ~s.~%" x)
(close-port sock)
#f)))
((() () ()) ;timeout
(log "timeout while sending files to ~a.~%" port)
(close-port sock)
#f))))
;;;
;;; ZMQ.
;;;
(define %zmq-context
(zmq-create-context))
(define (EINTR-safe proc)
"Return a variant of PROC that catches EINTR 'zmq-error' exceptions and
retries a call to PROC."
(define (safe . args)
(catch 'zmq-error
(lambda ()
(apply proc args))
(lambda (key errno . rest)
(if (= errno EINTR)
(apply safe args)
(apply throw key errno rest)))))
safe)
(define zmq-poll*
;; Return a variant of ZMQ-POLL that catches EINTR errors.
(EINTR-safe zmq-poll))
(define (zmq-socket-ready? items socket)
"Return #t if the given SOCKET is part of ITEMS, a list returned by a
'zmq-poll' call, return #f otherwise."
(find (lambda (item)
(eq? (poll-item-socket item) socket))
items))
(define (zmq-read-message msg)
(call-with-input-string msg read))
(define (zmq-empty-delimiter)
"Return an empty ZMQ delimiter used to format message envelopes."
(make-bytevector 0))
;; ZMQ Messages.
(define* (zmq-build-request-message drv
#:key
priority
timeout
max-silent
timestamp
system)
"Return a message requesting the build of DRV for SYSTEM."
(format #f "~s" `(build (drv ,drv)
(priority ,priority)
(timeout ,timeout)
(max-silent ,max-silent)
(timestamp ,timestamp)
(system ,system))))
(define (zmq-no-build-message)
"Return a message that indicates that no builds are available."
(format #f "~s" `(no-build)))
(define (zmq-build-started-message drv worker)
"Return a message that indicates that the build of DRV has started."
(format #f "~s" `(build-started (drv ,drv) (worker ,worker))))
(define* (zmq-build-failed-message drv url #:optional log)
"Return a message that indicates that the build of DRV has failed."
(format #f "~s" `(build-failed (drv ,drv) (url ,url) (log ,log))))
(define* (zmq-build-succeeded-message drv url #:optional log)
"Return a message that indicates that the build of DRV is done."
(format #f "~s" `(build-succeeded (drv ,drv) (url ,url) (log ,log))))
(define (zmq-worker-ping worker)
"Return a message that indicates that WORKER is alive."
(format #f "~s" `(worker-ping ,worker)))
(define (zmq-worker-ready-message worker)
"Return a message that indicates that WORKER is ready."
(format #f "~s" `(worker-ready ,worker)))
(define (zmq-worker-request-work-message name)
"Return a message that indicates that WORKER is requesting work."
(format #f "~s" `(worker-request-work ,name)))
(define remote-server-service-type
"_remote-server._tcp")

View File

@ -34,6 +34,7 @@
#:use-module ((guix utils) #:select (string-replace-substring)) #:use-module ((guix utils) #:select (string-replace-substring))
#:use-module ((cuirass database) #:select (build-status #:use-module ((cuirass database) #:select (build-status
evaluation-status)) evaluation-status))
#:use-module (cuirass remote)
#:export (html-page #:export (html-page
specifications-table specifications-table
evaluation-info-table evaluation-info-table
@ -42,7 +43,8 @@
build-details build-details
evaluation-build-table evaluation-build-table
running-builds-table running-builds-table
global-metrics-content)) global-metrics-content
workers-status))
(define (navigation-items navigation) (define (navigation-items navigation)
(match navigation (match navigation
@ -136,6 +138,9 @@ system whose names start with " (code "guile-") ":" (br)
(a (@ (class "dropdown-item") (a (@ (class "dropdown-item")
(href "/metrics")) (href "/metrics"))
"Global metrics") "Global metrics")
(a (@ (class "dropdown-item")
(href "/workers"))
"Workers status")
(a (@ (class "dropdown-item") (a (@ (class "dropdown-item")
(href "/status")) (href "/status"))
"Running builds"))) "Running builds")))
@ -293,10 +298,8 @@ system whose names start with " (code "guile-") ":" (br)
(time->string (assq-ref build #:stoptime)) (time->string (assq-ref build #:stoptime))
"—"))) "—")))
(tr (th "Log file") (tr (th "Log file")
(td ,(if completed? (td (a (@ (href "/build/" ,(assq-ref build #:id) "/log/raw"))
`(a (@ (href "/build/" ,(assq-ref build #:id) "/log/raw")) "raw")))
"raw")
"—")))
(tr (th "Derivation") (tr (th "Derivation")
(td (pre ,(assq-ref build #:derivation)))) (td (pre ,(assq-ref build #:derivation))))
(tr (th "Outputs") (tr (th "Outputs")
@ -515,10 +518,8 @@ and BUILD-MAX are global minimal and maximal (stoptime, rowid) pairs."
(td ,(assq-ref build #:job)) (td ,(assq-ref build #:job))
(td ,(assq-ref build #:nixname)) (td ,(assq-ref build #:nixname))
(td ,(assq-ref build #:system)) (td ,(assq-ref build #:system))
(td ,(if completed? (td (a (@ (href "/build/" ,(assq-ref build #:id) "/log/raw"))
`(a (@ (href "/build/" ,(assq-ref build #:id) "/log/raw")) "raw"))))
"raw")
"—"))))
(define (build-id build) (define (build-id build)
(match build (match build
@ -810,7 +811,9 @@ and BUILD-MAX are global minimal and maximal row identifiers."
(td ,(assq-ref build #:job-name)) (td ,(assq-ref build #:job-name))
(td ,(time->string (td ,(time->string
(assq-ref build #:starttime))) (assq-ref build #:starttime)))
(td ,(assq-ref build #:system)))) (td ,(assq-ref build #:system))
(td (a (@ (href "/build/" ,(assq-ref build #:id) "/log/raw"))
"raw"))))
`((p (@ (class "lead")) "Running builds") `((p (@ (class "lead")) "Running builds")
(table (table
@ -820,7 +823,8 @@ and BUILD-MAX are global minimal and maximal row identifiers."
`((thead (tr (th (@ (scope "col")) "ID") `((thead (tr (th (@ (scope "col")) "ID")
(th (@ (scope "col")) "Job") (th (@ (scope "col")) "Job")
(th (@ (scope "col")) "Queued at") (th (@ (scope "col")) "Queued at")
(th (@ (scope "col")) "System"))) (th (@ (scope "col")) "System")
(th (@ (scope "col")) "Log")))
(tbody (tbody
,(map build-row builds))))))) ,(map build-row builds)))))))
@ -1013,3 +1017,41 @@ completed builds divided by the time required to build them.")
#:title "Pending builds" #:title "Pending builds"
#:labels '("Pending builds") #:labels '("Pending builds")
#:colors (list "#3e95cd"))))) #:colors (list "#3e95cd")))))
(define (workers-status workers builds)
(define (build-row build)
`(tr
(th (@ (scope "row"))
(a (@ (href "/build/" ,(assq-ref build #:id) "/details"))
,(assq-ref build #:id)))
(td ,(assq-ref build #:job-name))
(td ,(time->string
(assq-ref build #:starttime)))
(td ,(assq-ref build #:system))
(td (a (@ (href "/build/" ,(assq-ref build #:id) "/log/raw"))
"raw"))))
(define (worker-header worker)
`((p ,(integer->char 128994)
" "
(b ,(worker-name worker))
,(format #f " (~a, ~{~a ~})"
(worker-address worker)
(worker-systems worker)))))
(define (worker-table worker builds)
`(,@(worker-header worker)
(table
(@ (class "table table-sm table-hover table-striped"))
,@(if (null? builds)
`((th (@ (scope "col")) "Idle"))
`((thead (tr (th (@ (scope "col")) "ID")
(th (@ (scope "col")) "Job")
(th (@ (scope "col")) "Queued at")
(th (@ (scope "col")) "System")
(th (@ (scope "col")) "Log")))
(tbody
,(map build-row builds)))))))
`((p (@ (class "lead")) "Workers status")
,@(map worker-table workers builds)))

View File

@ -7,8 +7,9 @@ CREATE TABLE Specifications (
proc_input TEXT NOT NULL, -- name of the input containing the proc that does the evaluation proc_input TEXT NOT NULL, -- name of the input containing the proc that does the evaluation
proc_file TEXT NOT NULL, -- file containing the procedure that does the evaluation, relative to proc_input proc_file TEXT NOT NULL, -- file containing the procedure that does the evaluation, relative to proc_input
proc TEXT NOT NULL, -- defined in proc_file proc TEXT NOT NULL, -- defined in proc_file
proc_args TEXT NOT NULL, -- passed to proc proc_args TEXT NOT NULL, -- passed to proc
build_outputs TEXT NOT NULL --specify what build outputs should be made available for download build_outputs TEXT NOT NULL, --specify what build outputs should be made available for download
priority INTEGER NOT NULL DEFAULT 0
); );
CREATE TABLE Inputs ( CREATE TABLE Inputs (
@ -61,10 +62,13 @@ CREATE TABLE Builds (
evaluation INTEGER NOT NULL, evaluation INTEGER NOT NULL,
job_name TEXT NOT NULL, job_name TEXT NOT NULL,
system TEXT NOT NULL, system TEXT NOT NULL,
machine TEXT, --optional, machine performing the build. worker TEXT, --optional, worker performing the build.
nix_name TEXT NOT NULL, nix_name TEXT NOT NULL,
log TEXT NOT NULL, log TEXT NOT NULL,
status INTEGER NOT NULL, status INTEGER NOT NULL,
priority INTEGER NOT NULL DEFAULT 0,
max_silent INTEGER NOT NULL DEFAULT 0,
timeout INTEGER NOT NULL DEFAULT 0,
timestamp INTEGER NOT NULL, timestamp INTEGER NOT NULL,
starttime INTEGER NOT NULL, starttime INTEGER NOT NULL,
stoptime INTEGER NOT NULL, stoptime INTEGER NOT NULL,
@ -96,6 +100,13 @@ CREATE TABLE Events (
event_json TEXT NOT NULL event_json TEXT NOT NULL
); );
CREATE TABLE Workers (
name TEXT NOT NULL PRIMARY KEY,
address TEXT NOT NULL,
systems TEXT NOT NULL,
last_seen INTEGER NOT NULL
);
-- XXX: All queries targeting Builds and Outputs tables *must* be covered by -- XXX: All queries targeting Builds and Outputs tables *must* be covered by
-- an index. It is also preferable for the other tables. -- an index. It is also preferable for the other tables.
CREATE INDEX Builds_status_index ON Builds (status); CREATE INDEX Builds_status_index ON Builds (status);
@ -106,6 +117,7 @@ CREATE INDEX Builds_timestamp_stoptime on Builds(timestamp, stoptime);
CREATE INDEX Builds_stoptime on Builds(stoptime DESC); CREATE INDEX Builds_stoptime on Builds(stoptime DESC);
CREATE INDEX Builds_stoptime_id on Builds(stoptime DESC, id DESC); CREATE INDEX Builds_stoptime_id on Builds(stoptime DESC, id DESC);
CREATE INDEX Builds_status_ts_id on Builds(status DESC, timestamp DESC, id ASC); CREATE INDEX Builds_status_ts_id on Builds(status DESC, timestamp DESC, id ASC);
CREATE INDEX Builds_priority_timestamp on Builds(priority DESC, timestamp ASC);
CREATE INDEX Evaluations_status_index ON Evaluations (id, status); CREATE INDEX Evaluations_status_index ON Evaluations (id, status);
CREATE INDEX Evaluations_specification_index ON Evaluations (specification, id DESC); CREATE INDEX Evaluations_specification_index ON Evaluations (specification, id DESC);

View File

@ -1,5 +1,5 @@
BEGIN TRANSACTION; BEGIN TRANSACTION;
ALTER TABLE Builds ADD machine TEXT DEFAULT NULL; ALTER TABLE Builds ADD worker TEXT DEFAULT NULL;
COMMIT; COMMIT;

10
src/sql/upgrade-18.sql Normal file
View File

@ -0,0 +1,10 @@
BEGIN TRANSACTION;
CREATE TABLE Workers (
name TEXT NOT NULL PRIMARY KEY,
address TEXT NOT NULL,
systems TEXT NOT NULL,
last_seen INTEGER NOT NULL
);
COMMIT;

11
src/sql/upgrade-19.sql Normal file
View File

@ -0,0 +1,11 @@
BEGIN TRANSACTION;
ALTER TABLE Specifications ADD priority INTEGER NOT NULL DEFAULT 0;
ALTER TABLE Builds ADD priority INTEGER NOT NULL DEFAULT 0;
ALTER TABLE Builds ADD max_silent INTEGER NOT NULL DEFAULT 0;
ALTER TABLE Builds ADD timeout INTEGER NOT NULL DEFAULT 0;
CREATE INDEX Builds_priority_timestamp on Builds(priority DESC, timestamp ASC);
COMMIT;

View File

@ -47,7 +47,8 @@
(#:tag . #f) (#:tag . #f)
(#:commit . #f) (#:commit . #f)
(#:no-compile? . #f)))) (#:no-compile? . #f))))
(#:build-outputs . ()))) (#:build-outputs . ())
(#:priority . 9)))
(define (make-dummy-checkouts fakesha1 fakesha2) (define (make-dummy-checkouts fakesha1 fakesha2)
`(((#:commit . ,fakesha1) `(((#:commit . ,fakesha1)

View File

@ -218,12 +218,6 @@
(object->json-string build-query-result) (object->json-string build-query-result)
json->scm))) json->scm)))
(test-equal "/build/1/log/raw"
`(302 ,(string->uri-reference "/log/fake-1.0"))
(let ((response (http-get (test-cuirass-uri "/build/1/log/raw"))))
(list (response-code response)
(response-location response))))
(test-equal "/build/42" (test-equal "/build/42"
404 404
(response-code (http-get (test-cuirass-uri "/build/42")))) (response-code (http-get (test-cuirass-uri "/build/42"))))