2
0
Fork 0
mirror of git://git.savannah.gnu.org/guix/maintenance.git synced 2023-12-14 03:33:04 +01:00
maintenance/hydra/berlin.scm
Maxim Cournoyer 223fa5351c
berlin: Configure a full garbage collection run daily.
* hydra/berlin.scm (services): Set the gc-threshold argument of the
frontend-services procedure to #f.
2023-01-18 16:30:19 -05:00

550 lines
22 KiB
Scheme
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

;; OS configuration for "berlin", the frontend of the compile farm
;; hosted at the MDC.
;; Copyright © 2016-2022 Ludovic Courtès <ludo@gnu.org>
;; Copyright © 2017, 2018, 2019, 2020, 2021, 2022 Ricardo Wurmus <rekado@elephly.net>
;; Copyright © 2019, 2021 Julien Lepiller <julien@lepiller.eu>
;; Copyright © 2020, 2021 Florian Pelz <pelzflorian@pelzflorian.de>
;; Copyright © 2020, 2021 Mathieu Othacehe <othacehe@gnu.org>
;; Copyright © 2021 Tobias Geerinckx-Rice <me@tobias.gr>
;; Copyright © 2022 Maxim Cournoyer <maxim.cournoyer@gmail.com>
;; Released under the GNU GPLv3 or any later version.
(use-modules (gnu) (guix) (sysadmin services) (sysadmin people) (sysadmin dns)
(sysadmin web)
(guix git-download)
((guix utils) #:select (current-source-directory))
((guix build utils) #:select (find-files))
(srfi srfi-1)
(ice-9 match))
(use-service-modules avahi base databases dns mcron monitoring networking admin
rsync shepherd ssh vpn web)
(use-package-modules admin base certs databases disk emacs linux mail monitoring
screen ssh tls tor vim package-management
version-control
web wget ci rsync
guile-xyz)
(define %sysadmins
;; The sysadmins.
(list (sysadmin (name "ludo")
(full-name "Ludovic Courtès")
(ssh-public-key (local-file "keys/ssh/ludo.pub")))
(sysadmin (name "rekado")
(full-name "Ricardo Wurmus")
(ssh-public-key (local-file "keys/ssh/rekado.pub")))
(sysadmin (name "andreas")
(full-name "Andreas Enge")
(ssh-public-key (local-file "keys/ssh/andreas.pub")))
(sysadmin (name "mbakke")
(full-name "Marius Bakke")
(ssh-public-key (local-file "keys/ssh/mbakke.pub")))
(sysadmin (name "nckx")
(full-name "Tobias Geerinckx-Rice")
(ssh-public-key (local-file "keys/ssh/nckx.pub")))
(sysadmin (name "mathieu")
(full-name "Mathieu Othacehe")
(ssh-public-key (local-file "keys/ssh/mathieu.pub")))
(sysadmin (name "pimi")
(full-name "Mădălin Patrascu")
(ssh-public-key (local-file "keys/ssh/pimi.pub")))
(sysadmin (name "janneke")
(full-name "Jan (janneke) Nieuwenhuizen")
(ssh-public-key (local-file "keys/ssh/janneke.pub")))
(sysadmin (name "cbaines")
(full-name "Christopher Baines")
(ssh-public-key (local-file "keys/ssh/cbaines.pub")))
(sysadmin (name "lfam")
(full-name "Leo Famulari")
(ssh-public-key (local-file "keys/ssh/lfam.pub")))
(sysadmin (name "maxim")
(full-name "Maxim Cournoyer")
(ssh-public-key (local-file "keys/ssh/maxim.pub")))))
(include "nginx/berlin.scm")
(include "website.scm")
;;;
;;; Operating system.
;;;
(define %motd
;; Message of the day!
(plain-file "motd"
"\
░░░ ░░░
░░▒▒░░░░░░░░░ ░░░░░░░░░▒▒░░
░░▒▒▒▒▒░░░░░░░ ░░░░░░░▒▒▒▒▒░
░▒▒▒░░▒▒▒▒▒ ░░░░░░░▒▒░
░▒▒▒▒░ ░░░░░░
▒▒▒▒▒ ░░░░░░
▒▒▒▒▒ ░░░░░
░▒▒▒▒▒ ░░░░░ Welcome to berlin!
▒▒▒▒▒ ░░░░░
▒▒▒▒▒ ░░░░░
░▒▒▒▒▒░░░░░
▒▒▒▒▒▒░░░
▒▒▒▒▒▒░
Best practices:
1. Store everything in guix-maintenance.git.
2. To reconfigure, use the latest Guix available and your personal,
up-to-date copy of guix-maintenance, e.g. '~/src/guix-maintenance'.
Use 'sudo' to reconfigure to leave traces.
3. Notify guix-sysadmin@gnu.org when reconfiguring.
4. Notify guix-sysadmin@gnu.org when something goes wrong.
5. Notify ricardo.wurmus@mdc-berlin.de or rekado@elephly.net when the
machine doesn't respond. Only Ricardo has access to the serial console
to reset the machine.
Happy hacking!\n"))
(define %multipath.conf
(plain-file "multipath.conf"
"\
defaults {
user_friendly_names \"yes\"
find_multipaths \"yes\"
}
blacklist {
devnode \"!^(sd[a-z]|dasd[a-z]|nvme[0-9])\"
device {
vendor \".*\"
product \".*\"
}
}
# allow only Dell Compelent volumes
blacklist_exceptions {
device {
vendor \"COMPELNT\"
product \"Compellent Vol\"
}
}
devices {
device {
vendor \"COMPELNT\"
product \"Compellent Vol\"
path_grouping_policy \"group_by_prio\"
failback \"immediate\"
no_path_retry \"queue\"
}
}
"))
(define %copy-kernel-and-initrd
;; The storage device where the root file system is is invisible to
;; GRUB. Thus, copy the kernel and initrd to /boot, where GRUB will
;; be able to find them.
(with-imported-modules '((guix build utils))
#~(begin
(use-modules (guix build utils))
(for-each
(lambda (file)
(let ((target (string-append "/boot/@root/" file)))
(format #t "copying '~a' to /boot/@root/~%" file)
(mkdir-p (dirname target))
(copy-recursively file target)))
;; /run/current-system/kernel is a profile. The trick below
;; allows us to get at its actual directory name, which is
;; what 'grub.cfg' refers to.
(list (dirname
(canonicalize-path "/run/current-system/kernel/bzImage"))
(dirname (canonicalize-path "/run/current-system/initrd")))))))
(define %build-node-key-directory
;; Directory containing the signing keys of build nodes.
(string-append (current-source-directory) "/keys/guix/berlin"))
(define %build-node-keys
;; Signing keys of the build nodes. The signing key of the head
;; node should be available so that it can use cached substitutes
;; that no longer exist in its store.
(map (lambda (file)
(local-file file (string-map (match-lambda
(#\: #\-)
(chr chr))
(basename file))))
(find-files %build-node-key-directory "\\.pub$")))
;;;
;;; Backups.
;;;
(define %rsync-modules-for-backup
;; Directories exported so they can be backed up on another machine on the
;; project's VPN.
(list (rsync-module
(name "web-pdf")
(file-name "/srv/guix-pdfs"))
(rsync-module
(name "web-video")
(file-name "/srv/videos"))
(rsync-module
(name "web-audio")
(file-name "/srv/audio"))
(rsync-module
(name "web-cuirass")
(file-name "/src/cuirass-releases"))
(rsync-module
(name "disarchive")
(file-name "/gnu/disarchive"))
(rsync-module
(name "substitutes")
(file-name "/var/cache/guix/publish"))))
;;;
;;; Btrfs pools.
;;;
;;; Large Btrfs partition on the MDC-provided SAN storage (100 TiB).
(define %btrfs-san-uuid "d5d1a040-7f2a-4c38-9a89-82f08866f6ec")
(define %common-btrfs-options '(("compress" . "zstd")
("space_cache" . "v2")))
(define %btrfs-pool-san
(file-system
(device (uuid %btrfs-san-uuid))
(mount-point "/mnt/btrfs-pool-san")
(type "btrfs")
(options (alist->file-system-options
(cons '("subvolid" . "5")
%common-btrfs-options)))))
(define (btrfs-subvolume-mount device-uuid name mount-point)
"Return a file system to mount the Btrfs subvolume NAME on DEVICE-UUID
at MOUNT-POINT."
(file-system
(device (uuid device-uuid))
(mount-point mount-point)
(create-mount-point? #t)
(type "btrfs")
(options (alist->file-system-options
(cons (cons "subvol" name)
%common-btrfs-options)))))
(define btrfs-balance-job
;; Re-allocate chunks which are using less than 5% of their chunk
;; space, to regain Btrfs 'unallocated' space. The usage is kept
;; low (5%) to minimize wear on the SSD. Runs at 5 AM every 3 days.
#~(job '(next-hour-from (next-day (range 1 31 3)) '(5))
(lambda ()
(system* #$(file-append btrfs-progs "/bin/btrfs")
"balance" "start" "-dusage=5" "/"))
"btrfs-balance"))
(define (anonip-service file)
(service anonip-service-type
(anonip-configuration
(input (format #false "/var/run/anonip/~a" file))
(output (format #false "/var/log/anonip/~a" file)))))
(define %anonip-log-files
;; List of files handled by Anonip
'("http.access.log"
"https.access.log"
"disarchive.access.log"
"dump-guix-gnu-org.https.access.log"
"qualif.access.log"
"bootstrappable.access.log"
"bootstrappable.https.access.log"
"workflows-guix-info.access.log"
"workflows-guix-info.https.access.log"
"issues-guix-gnu-org.https.access.log"))
(define (log-file->anonip-service-name file)
"Return the name of the Anonip service handling FILE, a log file."
(symbol-append 'anonip-/var/log/anonip/ (string->symbol file)))
(operating-system
(host-name "berlin.guix.gnu.org")
(timezone "Europe/Berlin")
(locale "en_US.utf8")
(name-service-switch %mdns-host-lookup-nss)
;; Allow access through the serial console at 141.80.167.201; the
;; management interface can only be accessed through selected
;; servers within the MDC campus network.
(kernel-arguments '("console=tty0"
"console=ttyS0,115200"))
;; The Dell server need these kernel modules for the
;; RAID controller.
(initrd-modules (append (list "megaraid_sas" "scsi_transport_sas"
"mpt3sas" "libsas"
;; Suggested by 'guix system init' for
;; the SAN storage.
"qla2xxx")
%base-initrd-modules))
;; Show the GRUB menu on the serial interface.
(bootloader (bootloader-configuration
(bootloader grub-efi-bootloader)
(targets '("/boot/efi"))
(terminal-inputs '(serial))
(terminal-outputs '(serial))))
(file-systems (cons*
(file-system
(mount-point "/boot")
(device (uuid "67498a2f-3e32-4e8c-96a5-8a4844ea229c")) ;/dev/sdg3
(type "ext4"))
(file-system
(mount-point "/boot/efi")
(device (uuid "43AE-6859" 'fat)) ;/dev/sdg2
(type "vfat"))
%btrfs-pool-san ;for convenience
(btrfs-subvolume-mount %btrfs-san-uuid "@root" "/")
(btrfs-subvolume-mount %btrfs-san-uuid "@cache" "/var/cache")
(btrfs-subvolume-mount %btrfs-san-uuid "@home" "/home")
%base-file-systems))
;; Local admin account for MDC maintenance.
(users (cons (user-account
(name "bi-admin")
(comment "Local admin")
(group "users")
(supplementary-groups '("wheel"))
(home-directory "/home/bi-admin"))
%base-user-accounts))
(packages (cons* certbot emacs wget iptables
jnettop openssh rsync screen strace
;; This is needed to set GIT_SSL_CAINFO allowing
;; Cuirass to fetch sources via HTTPS.
nss-certs
;; This is for git-receive-pack et al
git-minimal
;; This is for the mumi mailer
msmtp
;; This is for bypassing the firewall...
torsocks
;; This is for a redundant connection to the SAN
multipath-tools
%base-packages))
(services (cons*
(simple-service 'copy-kernel+initrd-to-/boot
activation-service-type
%copy-kernel-and-initrd)
(simple-service 'etc-multipath.conf
etc-service-type
(list `("multipath.conf" ,%multipath.conf)))
(service static-networking-service-type
(list (static-networking
(addresses
(list
;; Connection to the DMZ for public access
;; This is a 10G port.
(network-address
(device "eno2")
(value "141.80.181.40/24"))
;; Connection to build nodes
(network-address
(device "eno1")
(value "141.80.167.131/26"))
;; Connection to maintenance network
(network-address
(device "eno4")
(value "141.80.167.253/26"))))
(routes
(list (network-route
(destination "default")
(gateway "141.80.181.1")))))))
;; Allow login over serial console.
(agetty-service (agetty-configuration
(tty "ttyS0")
(baud-rate "115200")))
;; Discover substitute servers.
(service avahi-service-type
(avahi-configuration (debug? #t)))
;; Periodically populate the Disarchive database. Store it
;; under /gnu, which is the big drive; talk directly to the
;; local Cuirass instance.
(service disarchive-service-type
(disarchive-configuration
(directory "/gnu/disarchive")
(cuirass-url "http://localhost:8081")))
;; Rsync service for backup purposes. Listen only the VPN
;; address.
(service rsync-service-type
(rsync-configuration
(address "10.0.0.1")
(modules %rsync-modules-for-backup)))
;; DNS
(service knot-service-type
(knot-configuration
(zones (list (knot-zone-configuration
(domain "guix.gnu.org")
(master '("bayfront-master"))
(acl '("notify-allow")))))
(acls (list (knot-acl-configuration
(id "notify-allow")
(address (list bayfront-ip4))
(action '(notify)))))
(remotes (list (knot-remote-configuration
(id "bayfront-master")
(address (list bayfront-ip4)))))))
;; Monitoring
(service prometheus-node-exporter-service-type)
(service zabbix-agent-service-type)
(service zabbix-server-service-type
(zabbix-server-configuration
(include-files '("/root/zabbix-pass"))
(extra-options "AlertScriptsPath=/root/zabbix-alert-scripts\n")))
(service zabbix-front-end-service-type
(zabbix-front-end-configuration
(nginx (list %zabbix-nginx-server
%zabbix-nginx-local-server))
(db-secret-file "/root/zabbix-front-end-secrets")))
;; For the Zabbix database. It was created by manually
;; following the instructions here:
;; https://www.zabbix.com/documentation/3.0/manual/appendix/install/db_scripts
(service postgresql-service-type
(postgresql-configuration
(postgresql postgresql-13)
(config-file
(postgresql-config-file
(extra-config
'(("max_connections" 300)))))))
(service postgresql-role-service-type)
(service ntp-service-type)
;; Make SSH and ci.guix available over Tor.
(tor-hidden-service "http"
'((22 "127.0.0.1:22")
(80 "127.0.0.1:80")
(443 "127.0.0.1:443")))
;; Onion service for the web site.
(tor-hidden-service "guix.gnu.org"
'((80 "127.0.0.1:80")
(443 "127.0.0.1:443")))
(service tor-service-type)
(service nginx-service-type
(nginx-configuration
(inherit %nginx-configuration)
;; Have the nginx shepherd service depend on the
;; Anonip services so that the writing end of the
;; logs, handled by Anonip, is ready when nginx starts
;; running.
(shepherd-requirement
(map log-file->anonip-service-name
%anonip-log-files))))
(service mumi-service-type
(mumi-configuration
;; The mailer is broken again. No pretty bug report
;; like <https://issues.guix.gnu.org/49295>, but it's
;; broken.
(mailer? #f)
(sender "issues.guix.gnu.org@elephly.net")
(smtp "sendmail:///var/mumi/mumi-mailer")))
;; For the Mumi mailer queue
(service redis-service-type)
;; Stop Cuirass when disk space is low.
(service disk-space-watchdog-service-type
(list (* 500 GiB) (* 50 GiB)))
;; VPN connection to the remote build nodes.
(service wireguard-service-type
(wireguard-configuration
(addresses (list "10.0.0.1/32"))
(peers
(list
(wireguard-peer
(name "overdrive1")
(public-key "m2qys8ATAKUTT8YNUM3OmnJnw1lYm5GHpUA42/l1Qz8=")
(allowed-ips '("10.0.0.3/32")))
(wireguard-peer
(name "dover")
(public-key "b3D6Nv5X4npfko99HELdewtKL8LzbbeUAIFjpwU7HhA=")
(allowed-ips '("10.0.0.4/32")))
(wireguard-peer
(name "guix-x15")
(public-key "pM7dAWXJ35isIDJA3OpbR2YG1Pk3MI9VTlP5ELAeQkM=")
(allowed-ips '("10.0.0.5/32")))
(wireguard-peer
(name "guix-x15b")
(public-key "cwRqmMc8IPAHxFCGIt1WcnJnoWJcefcTXte2vMNi6Wo=")
(allowed-ips '("10.0.0.6/32")))
(wireguard-peer
(name "guixp9")
(public-key "4jflGVC+6ee1jsXR/6GgBKjxzw4T4WIwYiMhj/lYQTE=")
(allowed-ips '("10.0.0.7/32")))
(wireguard-peer
(name "pankow")
(public-key "BVfQ20Hh+3WSr5esDaXcoM6T7F809iPuGNSpeD1Qd3I=")
(allowed-ips '("10.0.0.8/32")))
(wireguard-peer
(name "kreuzberg")
(public-key "f9WGJTXp8bozJb0KxePjkOclF5pJUy1AomHWJHy80y4=")
(allowed-ips '("10.0.0.9/32")))
(wireguard-peer
(name "grunewald")
(public-key "icqpTshydmh1TW43YDMRS+dpb8ND6iVy6vLlfwtlGSk=")
(allowed-ips '("10.0.0.10/32")))
(wireguard-peer
(name "bayfront")
(public-key "/oydkAV1bep1JRQ/JRO+tEuybwtwczYlidSP97CnDwI=")
(allowed-ips '("10.0.0.11/32")))
(wireguard-peer
(name "jade")
(public-key "FEFR3NX+DfkrsTHpgECvzW/M/0D8V4bVtCEEzQ5naww=")
(allowed-ips '("10.0.0.12/32")))
(wireguard-peer
(name "sjd-p9")
(public-key "JESZIT1RikNQ+xM1a18pXGvZQoZ3vmVkNA+w/qx1Bzs=")
(allowed-ips '("10.0.0.13/32")))
(wireguard-peer
(name "lieserl")
(public-key "CeRd0ZKjlyMDSMbSes1UQ43lADxWX2X8dS/VFo9qej8=")
(allowed-ips '("10.0.0.14/32")))))))
(append
(map anonip-service %anonip-log-files)
(website-services)
(modify-services
(frontend-services %sysadmins
;; Make sure we get enough build users.
#:authorized-keys %build-node-keys
#:build-accounts-to-max-jobs-ratio 5
#:gc-threshold #f
#:systems '("x86_64-linux" "i686-linux"
"aarch64-linux"
"powerpc64le-linux")
#:motd %motd
#:publish-workers 8
#:max-jobs 20)
(mcron-service-type
config => (mcron-configuration
(inherit config)
(jobs (cons btrfs-balance-job
(mcron-configuration-jobs
config))))))))))