maintenance/hydra/deploy-node-129.scm

433 lines
16 KiB
Scheme
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

;;; Commentary;
;;;
;;; This machine uses a server identical to Berlin (a Dell PowerEdge
;;; R430 server) and serves the following purposes:
;;;
;;; 1. act as a regular Berlin build machine
;;;
;;; 2. be available as a fall-back to Berlin in case of problems
;;;
;;; 3. offer an rsync service to mirror Berlin's binary substitutes
;;;
;;; 4. Be used as a test bed for staging new configurations before
;;; they are deployed in production to Berlin.
;;;
;;; 5. Store and serve nars for bordeaux.guix.gnu.org
;;;
;;; TODO: Implement service redundancy with Berlin.
;;;
;;; FIXME: Re-deploying must happen from Berlin, due to bug #46760.
;;; FIXME: Must use reconfigure instead of deploy due to bug #63559.
;;;
;;; To update its operating system, make sure you are a sysadmin
;;; defined in the 'berlin-new-build-machine-os' procedure in
;;; (sysadmin build-machines). You can request another current
;;; sysadmin to commit the change and re-deploy it. Then you should
;;; be able to do the following, from your personal 'maintenance'
;;; checkout on Berlin:
;;;
;;; $ guix deploy -L modules deploy-node-129.scm
;;;
(use-modules (gnu bootloader)
(gnu bootloader grub)
(gnu packages linux)
(gnu packages certs)
(gnu services base)
(gnu services mcron)
(gnu services rsync)
(gnu services guix)
(gnu services ssh)
(gnu services web)
(gnu services certbot)
(gnu system file-systems)
(gnu system linux-initrd)
(guix gexp)
(sysadmin build-machines))
(define %ip-address "141.80.167.186")
;;; XXX: Copied from berlin-nodes.scm.
(define %authorized-guix-keys
;; List of authorized 'guix archive' keys.
(list (local-file "keys/guix/berlin.guixsd.org-export.pub")))
(define %nginx-server-blocks
(let ((common-locations
(list
(nginx-location-configuration
(uri "= /nix-cache-info")
(body '("
return 200 'StoreDir: /gnu/store\nWantMassQuery: 0\nPriority: 100\n';
add_header Content-Type text/plain;")))
(nginx-location-configuration
(uri "~ \\.narinfo$")
(body '("
proxy_pass http://nar-herder;
# For HTTP pipelining. This has a dramatic impact on performance.
client_body_buffer_size 128k;
# Narinfos requests are short, serve many of them on a connection.
keepalive_requests 20000;
")))
(nginx-location-configuration
(uri "~ \\.narinfo/info$")
(body '("proxy_pass http://nar-herder;")))
(nginx-location-configuration
(uri "/nar/")
(body '("proxy_pass http://nar-herder;")))
(nginx-location-configuration
(uri "/file/")
(body '("proxy_pass http://nar-herder;")))
(nginx-named-location-configuration
(name "nar-storage-location")
(body '("rewrite /internal/(.*) /$1 break;"
"proxy_pass https://nar-storage;"
"
set $via \"1.1 hydra-guix-129\";
if ($http_via) {
set $via \"$http_via, $via\";
}
proxy_set_header Via $via;"
"proxy_set_header Host bordeaux.guix.gnu.org:443;")))
(nginx-location-configuration
(uri "= /latest-database-dump")
(body '("proxy_pass http://nar-herder;")))
(nginx-location-configuration
(uri "= /recent-changes")
(body '("proxy_pass http://nar-herder;")))
(nginx-location-configuration
(uri "= /metrics")
(body '("proxy_pass http://nar-herder;")))
(nginx-location-configuration
(uri "~ ^/internal/nar/(.*)$")
(body '("
internal;
root /srv/bordeaux/nars;
try_files /nar/$1 @nar-storage-location;
error_page 404 /404;
client_body_buffer_size 256k;
# Nars are already compressed.
gzip off;
")))
(nginx-location-configuration
(uri "~ ^/internal/cached-nar/(.*)$")
(body '("
internal;
root /var/cache/nar-herder;
try_files /nar/$1 =404;
error_page 404 /404;
client_body_buffer_size 256k;
gzip off;
")))
(nginx-location-configuration
(uri "~ ^/internal/database/(.*)$")
(body '("internal;"
"alias /var/lib/nar-herder/$1;"))))))
(list
(nginx-server-configuration
(server-name '("hydra-guix-129.guix.gnu.org"))
(listen '("80"))
(root (local-file "nginx/html/hydra-guix-129" #:recursive? #t))
(locations
(append
common-locations
(list
(nginx-location-configuration ; For use by Certbot
(uri "/.well-known")
(body '(("root /var/www;"))))))))
(nginx-server-configuration
(server-name '("hydra-guix-129.guix.gnu.org"))
(listen '("443 ssl"))
(root (local-file "nginx/html/hydra-guix-129" #:recursive? #t))
(ssl-certificate
"/etc/letsencrypt/live/hydra-guix-129.guix.gnu.org/fullchain.pem")
(ssl-certificate-key
"/etc/letsencrypt/live/hydra-guix-129.guix.gnu.org/privkey.pem")
(raw-content
'("
# Make sure SSL is disabled.
ssl_protocols TLSv1.1 TLSv1.2 TLSv1.3;
# Disable weak cipher suites.
ssl_ciphers HIGH:!aNULL:!MD5;
ssl_prefer_server_ciphers on;"))
(locations common-locations)))))
(define %btrfs-san-uuid "3bd8e3fb-2ad1-41ff-ac80-4ca3e17950ea")
(define %common-btrfs-options '(("compress" . "zstd")
("space_cache" . "v2")))
(define %btrfs-ssd-uuid "43c70fb3-c587-49b7-9c8d-23356d656fae")
(define %btrfs-pool-san
(file-system
(device (uuid %btrfs-san-uuid))
(mount-point "/mnt/btrfs-pool-san")
(create-mount-point? #t)
(type "btrfs")
(options (alist->file-system-options
(cons '("subvolid" . "5")
%common-btrfs-options)))))
(define %btrfs-pool-ssd
(file-system
(device (uuid %btrfs-ssd-uuid))
(mount-point "/mnt/btrfs-pool-ssd")
(create-mount-point? #t)
(type "btrfs")
(options (alist->file-system-options
(cons '("subvolid" . "5")
%common-btrfs-options)))))
(define* (btrfs-subvolume-mount name mount-point
#:key (device-uuid %btrfs-san-uuid))
"Return a file system to mount the Btrfs subvolume NAME at
MOUNT-POINT. DEVICE-UUID can be provided to use a different Btrfs file
system than the default one hosted on the SAN storage."
(file-system
(device (uuid device-uuid))
(mount-point mount-point)
(create-mount-point? #t)
(type "btrfs")
(options (alist->file-system-options
(cons (cons "subvol" name)
%common-btrfs-options)))))
(define btrfs-balance-job
;; Re-allocate chunks which are using less than 5% of their chunk
;; space, to regain Btrfs 'unallocated' space. The usage is kept
;; low (5%) to minimize wear on the SSD. Runs at 5 AM every 3 days.
#~(job '(next-hour-from (next-day (range 1 31 3)) '(5))
(lambda ()
(system* #$(file-append btrfs-progs "/bin/btrfs")
"balance" "start" "-dusage=5" "/"))
"btrfs-balance"))
(define %multipath.conf
(plain-file "multipath.conf"
"\
defaults {
user_friendly_names \"yes\"
find_multipaths \"yes\"
}
blacklist {
devnode \"!^(sd[a-z]|dasd[a-z]|nvme[0-9])\"
device {
vendor \".*\"
product \".*\"
}
}
# allow only Dell Compelent volumes
blacklist_exceptions {
device {
vendor \"COMPELNT\"
product \"Compellent Vol\"
}
}
devices {
device {
vendor \"COMPELNT\"
product \"Compellent Vol\"
path_grouping_policy \"group_by_prio\"
failback \"immediate\"
no_path_retry \"queue\"
}
}
"))
(define %copy-kernel-and-initrd
;; The storage device where the root file system is is invisible to
;; GRUB. Thus, copy the kernel and initrd to /boot, where GRUB will
;; be able to find them.
(with-imported-modules '((guix build utils))
#~(begin
(use-modules (guix build utils))
(for-each
(lambda (file)
(let ((target (string-append "/boot/@root/" file)))
(format #t "copying '~a' to /boot/@root/~%" file)
(mkdir-p (dirname target))
(copy-recursively file target)))
;; /run/current-system/kernel is a profile. The trick below
;; allows us to get at its actual directory name, which is
;; what 'grub.cfg' refers to.
(list (dirname
(canonicalize-path "/run/current-system/kernel/bzImage"))
(dirname (canonicalize-path "/run/current-system/initrd")))))))
(define node-129-os
(let ((base-os (berlin-new-build-machine-os
129
#:authorized-guix-keys %authorized-guix-keys
#:emulated-architectures '("ppc64le")
#:childhurd? (childhurd-ip? %ip-address)
#:systems '("x86_64-linux" "i686-linux")
#:max-jobs 4
#:max-cores 24)))
(operating-system
(inherit base-os)
(initrd-modules (append (list "megaraid_sas" "scsi_transport_sas"
"mpt3sas" "libsas"
;; Suggested by 'guix system init' for
;; the SAN storage.
"qla2xxx")
%base-initrd-modules))
(bootloader
(bootloader-configuration
(inherit (operating-system-bootloader base-os)) ;efi bootloader
(targets (list "/boot/efi" "/boot/efi2"))))
(packages
(cons* multipath-tools
;; Needed for the nar-herder
nss-certs
(operating-system-packages base-os)))
(file-systems (cons*
(file-system
(mount-point "/boot") ;/dev/sda3 and /dev/sdb3 in Btrfs RAID 1
(device (uuid "f4ec81a5-3ea6-494b-8886-b71eec6721ee"))
(type "btrfs")
(options "compress=zstd"))
(file-system
(mount-point "/boot/efi")
(device (uuid "FC8E-0264" 'fat)) ;/dev/sda2
(type "vfat"))
(file-system
(mount-point "/boot/efi2")
(device (uuid "FCDB-FA3A" 'fat)) ;/dev/sdb2
(type "vfat"))
%btrfs-pool-san ;for convenience
(btrfs-subvolume-mount "@root" "/")
(btrfs-subvolume-mount "@home" "/home")
(btrfs-subvolume-mount "@cache" "/var/cache")
%btrfs-pool-ssd
;; The renameat2 trick only works on the parent
;; of an active mount point, not on the mount
;; point itself, so introduce a 'publish' parent
;; directory.
(btrfs-subvolume-mount "@publish-mirror"
"/srv/publish/substitutes")
(btrfs-subvolume-mount "@bordeaux-nars" "/srv/bordeaux/nars"
#:device-uuid %btrfs-ssd-uuid)
%base-file-systems))
(services
(cons* (simple-service 'copy-kernel+initrd-to-/boot
activation-service-type
%copy-kernel-and-initrd)
(simple-service 'etc-multipath.conf
etc-service-type
(list `("multipath.conf" ,%multipath.conf)))
(service rsync-service-type
(rsync-configuration
(modules
(list (rsync-module
(name "substitutes")
(file-name "/srv/publish/substitutes"))))))
(service nar-herder-service-type
(nar-herder-configuration
(mirror "https://bordeaux.guix.gnu.org")
(storage "/srv/bordeaux/nars")
(ttl "180d")
(log-level 'DEBUG)))
(service certbot-service-type
(certbot-configuration
(certificates
(list (certificate-configuration
(domains '("hydra-guix-129.guix.gnu.org")))))
;; TODO: Maybe this should be guix-sysadmin@gnu.org
(email "mail@cbaines.net")
(webroot "/var/www")))
(service nginx-service-type
(nginx-configuration
(upstream-blocks
(list (nginx-upstream-configuration
(name "nar-herder")
(servers '("127.0.0.1:8734")))
(nginx-upstream-configuration
(name "nar-storage")
(servers '("bordeaux.guix.gnu.org:443")))))
(server-blocks
%nginx-server-blocks)))
(modify-services (operating-system-user-services base-os)
(mcron-service-type
config => (mcron-configuration
(inherit config)
(jobs (cons btrfs-balance-job
(mcron-configuration-jobs config)))))
(static-networking-service-type
networks =>
(cons (static-networking
(addresses (list
;; This is a publicly accessible IP, to
;; allow accessing the Guix MDC network
;; via this machine when Berlin is down.
(network-address
(device "eno2")
(value "141.80.181.41/24"))
;; This gives the machine access to the
;; iDRAC network, so that it can access
;; Berlin's iDRAC for example.
(network-address
(device "eno4")
(value "141.80.167.251/26"))))
(routes (list (network-route
(destination "default")
(gateway "141.80.181.1"))))
(provision '(backdoor))) ;required else car error
networks))
(openssh-service-type
config => (openssh-configuration
(inherit config)
;; Only accept public key authentication for
;; enhanced security.
(password-authentication? #f)
;; Connecting as root is only allowed from
;; Berlin, and makes it possible to sync
;; subvolumes via Btrfs send/receive.
(authorized-keys
(cons
`("root"
,(local-file "keys/ssh/berlin.guixsd.org.pub"))
(openssh-configuration-authorized-keys config)))
;; The default route configured on Berlin
;; means it's seen as 141.80.181.40, not
;; 141.80.167.131 to hydra-guix-129.
(extra-content
(string-append
(openssh-configuration-extra-content config)
"
Match Address 141.80.181.40
PermitRootLogin yes\n")))))))
(swap-devices '())))) ;cannot do swap on Btrfs RAID
(list
(machine
(operating-system node-129-os)
(environment managed-host-environment-type)
(configuration
(machine-ssh-configuration
(host-name %ip-address)
(user (getenv "USER"))
(build-locally? #t)
(host-key "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMuCdrMoF25T9ejPLAAcS92b6lVIz5+U0avyYPQTG5NI")
(system "x86_64-linux")))))