diff --git a/.gitignore b/.gitignore index eb2ee0c..3ab73e2 100644 --- a/.gitignore +++ b/.gitignore @@ -223,3 +223,10 @@ talk.vrb /doc/sif-2021/article-1024.bib /doc/sif-2021/article-1024.pdf /doc/sif-2021/article-1024.tex +*.aux +*.log +*.out +/doc/ccs-2021/supply-chain.tex +/doc/ccs-2021/supply-chain.pdf +/doc/ccs-2021/images/commit-graph-intro.pdf +/doc/ccs-2021/images/commit-graph.pdf diff --git a/doc/ccs-2021/GNUmakefile b/doc/ccs-2021/GNUmakefile new file mode 100644 index 0000000..8ce79ad --- /dev/null +++ b/doc/ccs-2021/GNUmakefile @@ -0,0 +1,21 @@ +SKRIBILO := skribilo +PDFLATEX := pdflatex +RUBBER := rubber +DOT := dot +DOT-OPTS := -Gratio=.78 -Gwidth=8cm + +.DEFAULT_GOAL := supply-chain.pdf +ILLUSTRATIONS := \ + images/commit-graph.pdf \ + images/commit-graph-intro.pdf + +supply-chain.pdf: $(ILLUSTRATIONS) +%.pdf: %.tex + $(RUBBER) --pdf -I $$PWD "$<" + +%.tex: %.skb + $(SKRIBILO) -t latex -o "$@" "$<" + +%.pdf: %.dot + $(DOT) -Tpdf $(DOT-OPTS) < "$<" > "$@.tmp" + mv "$@.tmp" "$@" diff --git a/doc/ccs-2021/categories.tex b/doc/ccs-2021/categories.tex new file mode 100644 index 0000000..5988a97 --- /dev/null +++ b/doc/ccs-2021/categories.tex @@ -0,0 +1,23 @@ +\begin{CCSXML} + + +10011007.10011006.10011041.10011047 +Software and its engineering~Source code generation +500 + + +10011007.10011006.10011008.10011009.10011012 +Software and its engineering~Functional languages +300 + + +10011007.10011074.10011111.10011697 +Software and its engineering~System administration +300 + + +\end{CCSXML} + +\ccsdesc[500]{Software and its engineering~Source code generation} +\ccsdesc[300]{Software and its engineering~Functional languages} +\ccsdesc[300]{Software and its engineering~System administration} diff --git a/doc/ccs-2021/images/commit-graph-intro.dot b/doc/ccs-2021/images/commit-graph-intro.dot new file mode 100644 index 0000000..f3b48c2 --- /dev/null +++ b/doc/ccs-2021/images/commit-graph-intro.dot @@ -0,0 +1,21 @@ +digraph "Grafts" { + A [ label = "A", shape = box, fontname = Helvetica, color="#22aa44" ]; + B [ label = "B", shape = box, fontname = Helvetica, color="#22aa44", + style = bold, xlabel = "introductory commit" ]; + C [ label = "C", shape = box, fontname = Helvetica, color="#22aa44" ]; + D [ label = "D", shape = box, fontname = Helvetica, color="#22aa44" ]; + E [ label = "E", shape = box, fontname = Helvetica, color="#22aa44" ]; + F [ label = "F", shape = box, fontname = Helvetica, color="#22aa44" ]; + G [ label = "G", shape = box, fontname = Helvetica, color="#22aa44", + style = filled, fillcolor = "#ddeedd", color = "#ff0000", fontcolor = "#ff0000" ]; + H [ label = "H", shape = box, fontname = Helvetica, color="#22aa44" + style = filled, fillcolor = "#ddeedd", color = "#ff0000", fontcolor = "#ff0000" ]; + + B -> A [ color = orange ]; + C -> B [ color = orange ]; + D -> C [ color = orange ]; + E -> C [ color = darkviolet ]; + F -> E [ color = darkviolet ]; + G -> A [ color = red ]; + H -> G [ color = red ]; +} diff --git a/doc/ccs-2021/images/commit-graph.dot b/doc/ccs-2021/images/commit-graph.dot new file mode 100644 index 0000000..7547ec3 --- /dev/null +++ b/doc/ccs-2021/images/commit-graph.dot @@ -0,0 +1,16 @@ +digraph "Grafts" { + ratio = .4; + A [ label = "A\nauthor: Alice", shape = box, fontname = Helvetica, xlabel = "authorized: Alice", color="#aa4422" ]; + B [ label = "B\nauthor: Alice", shape = box, fontname = Helvetica, xlabel = "authorized: Alice, Bob", color="#aa4422" ]; + C [ label = "C\nauthor: Bob", shape = box, fontname = Helvetica, xlabel = "authorized: Alice, Bob", color="#22aa44" ]; + D [ label = "D\nauthor: Alice", shape = box, fontname = Helvetica, xlabel = "authorized: Alice, Bob", color="#aa4422" ]; + E [ label = "E\nauthor: Bob", shape = box, fontname = Helvetica, xlabel = "authorized: Alice, Bob", color="#22aa44" ]; + F [ label = "F\nauthor: Alice", shape = box, fontname = Helvetica, xlabel = "authorized: Alice, Bob", color="#aa4422" ]; + + B -> A [ color = orange ]; + C -> B [ color = orange ]; + D -> C [ color = orange ]; + F -> D [ color = orange ]; + F -> E [ color = darkviolet ]; + E -> B [ color = darkviolet ]; +} \ No newline at end of file diff --git a/doc/ccs-2021/security.sbib b/doc/ccs-2021/security.sbib new file mode 100644 index 0000000..6ec2fa8 --- /dev/null +++ b/doc/ccs-2021/security.sbib @@ -0,0 +1,106 @@ +(article lamb2021:reproducible + (author "Chris Lamb and Stefano Zacchiroli") + (title "Reproducible Builds: Increasing the Integrity of Software Supply Chains") + (publisher "IEEE Computer Society") + (year "2021") + (issn "0740-7459") + (doi "10.1109/MS.2021.3073045") + (journal "IEEE Software")) + +(inproceedings torresarias2016:omitting +(author "Santiago Torres-Arias and Anil Kumar Ammula and Reza Curtmola and Justin Cappos") +(title "On Omitting Commits and Committing Omissions: Preventing Git Metadata Tampering That (Re)introduces Software Vulnerabilities") +(booktitle "25th USENIX Security Symposium") +(year "2016") +(isbn "978-1-931971-32-4") +(address "Austin, TX") +(pages "379--395") +(url "https://www.usenix.org/conference/usenixsecurity16/technical-sessions/presentation/torres-arias") +(publisher "USENIX Association") +(month "August")) + +(inproceedings torresarias2019:intoto +(author "Santiago Torres-Arias and Hammad Afzali and Trishank Karthik Kuppusamy and Reza Curtmola and Justin Cappos") +(title "in-toto: Providing farm-to-table guarantees for bits and bytes") +(booktitle "28th USENIX Security Symposium") +(year "2019") +(isbn "978-1-939133-06-9") +(address "Santa Clara, CA") +(pages "1393--1410") +(url "https://www.usenix.org/conference/usenixsecurity19/presentation/torres-arias") +(publisher "USENIX Association") +(month "Aug")) + +(misc janneke:mes-web + (title "GNU Mes web site") + (author "Jan Nieuwenhuizen") + (url "https://gnu.org/software/mes") + (year "2021")) + +(misc janneke2020:bootstrap + (title "Guix Further Reduces Bootstrap Seed to 25%") + (author "Jan Nieuwenhuizen") + (year "2020") + (month "June") + (url "https://guix.gnu.org/en/blog/2020/guix-further-reduces-bootstrap-seed-to-25/")) + +(article thompson1984:trusting-trust + (author "Thompson, Ken") + (title "Reflections on Trusting Trust") + (year "1984") + (issue_date "Aug 1984") + (publisher "Association for Computing Machinery") + (address "New York, NY, USA") + (volume "27") + (number "8") + (issn "0001-0782") + (url "https://doi.org/10.1145/358198.358210") + (doi "10.1145/358198.358210") + (journal "Communications of the ACM") + (month "August") + (pages "761--763")) + +(inproceedings stevens2017:detection + (author "Stevens, Marc and Shumow, Daniel") + (title "Speeding up Detection of SHA-1 Collision Attacks Using Unavoidable Attack Conditions") + (year "2017") + (isbn "9781931971409") + (publisher "USENIX Association") + (address "USA") + (booktitle "Proceedings of the 26th USENIX Conference on Security Symposium") + (pages "881–897") + (numpages "17") + (location "Vancouver, BC, Canada") + (series "SEC'17")) + +(inproceedings stevens2017:collision + (author "Marc Stevens and Elie Bursztein and Pierre Karpman and Ange Albertini and Yarik Markov") + (editor "Katz, Jonathan + and Shacham, Hovav") + (title "The First Collision for Full SHA-1") + (booktitle "Advances in Cryptology -- CRYPTO 2017") + (year "2017") + (publisher "Springer International Publishing") + ;;(address "Cham") + (pages "570--596") + (isbn "978-3-319-63688-7")) + +(inproceedings leurent2020:shambles + (author "Gaëtan Leurent and Thomas Peyrin") + (title "SHA-1 is a Shambles: First Chosen-Prefix Collision on SHA-1 and Application to the PGP Web of Trust") + (booktitle "29th USENIX Security Symposium (USENIX Security 20)") + (year "2020") + (isbn "978-1-939133-17-5") + (pages "1839--1856") + (url "https://www.usenix.org/conference/usenixsecurity20/presentation/leurent") + (publisher "USENIX Association") + (month "August")) + + +#| +(defun skr-from-bibtex () + "Vaguely convert the BibTeX snippets after POINT to SBibTeX." + (interactive) + (while (re-search-forward "\\([a-z_-]+\\) *= *[{\"]\\([^}\"]+\\)[}\"] *, *$" nil nil) + (replace-match "(\\1 \"\\2\")"))) +|# diff --git a/doc/ccs-2021/supply-chain.skb b/doc/ccs-2021/supply-chain.skb new file mode 100644 index 0000000..97fb222 --- /dev/null +++ b/doc/ccs-2021/supply-chain.skb @@ -0,0 +1,724 @@ +(use-modules (skribilo package acmproc) + (skribilo engine) + (skribilo engine latex) + (skribilo ast) + (skribilo writer) + (skribilo output) + (skribilo utils strings) + (skribilo lib) + (skribilo evaluator) + (skribilo biblio author) + (skribilo source) + (skribilo source lisp) + (skribilo source parameters)) + +(define (---) ; emdash + (resolve (lambda (n e env) + (if (engine-format? "html" e) + (! "—") + (! "---"))))) + +(define (--) ; endash + (resolve (lambda (n e env) + (if (engine-format? "html" e) + (! "–") + (! "--"))))) + +(define (dash-dash) + (resolve (lambda (n e env) + (if (engine-format? "latex" e) + (! "{-}{-}") + "--")))) + +(define (url url) + (ref :text (tt url) :url url)) + +(define (=>) + (symbol "=>")) + +;; XXX: Terrible hack to turn hyphens into hyphenation points in 'tt'. +(set! (@@ (skribilo engine latex) latex-tt-encoding) + `((#\- "-\\-") + ,@(@@ (skribilo engine latex) latex-tt-encoding))) + +;; For pdflatex. +(engine-custom-set! (find-engine 'latex) 'image-format '("pdf")) + +;; Avoid "option clash" with acmart. +(engine-custom-set! (find-engine 'latex) 'hyperref #f) + +(engine-custom-set! (find-engine 'latex) 'usepackage + (let ((u (engine-custom (find-engine 'latex) + 'usepackage))) + ;; See + ;; and . + (string-append u "\n" + "\\usepackage{microtype}\n" + ;; "\\usepackage[hypcap]{caption}\n" + ;; "\\DeclareCaptionType{copyrightbox}\n" + "\\usepackage{balance}\n" + + ;; Trick so that ‘…’ is properly + ;; typeset inside teletype text. + "\\DeclareUnicodeCharacter{2026}{\\textrm{\\ldots}}\n" + + "\ +\\fancyhf{} % Remove fancy page headers +\\fancyhead[C]{Anonymous submission \\#9999 to ACM CCS 2021} % TODO: replace 9999 with your paper number +\\fancyfoot[C]{\\thepage} + +\\setcopyright{none} % No copyright notice required for submissions +\\acmConference[Anonymous Submission to ACM CCS 2021]{ACM Conference on Computer and Communications Security}{Due 15 May 2021}{Seoul} +\\acmYear{2021} + +\\settopmatter{printacmref=false, printccs=true, printfolios=true} % We want page numbers on submissions + +%%\\ccsPaper{9999} % TODO: replace with your paper number once obtained + +\\let\\oldthing\\footnotetextcopyrightpermission +\\renewcommand\\footnotetextcopyrightpermission[1]{\\oldthing{ + % Per + % , + % the copyright holder is me personally, not Inria. + Copyright \\copyright 2021 Ludovic Courtès.\\ + + Permission is granted to copy, distribute and/or modify this document + under the terms of the GNU Free Documentation License, Version 1.3 + or any later version published by the Free Software Foundation; + with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. + A copy of the license is + available at \\url{https://www.gnu.org/licenses/gfdl.html}. + + % Give a link to the 'Transparent Copy', as per Section 3 of the GFDL. + The source of this document is available from + \\url{https://git.sv.gnu.org/cgit/guix/maintenance.git}. +}} +"))) + +(let ((latex (find-engine 'latex))) + (engine-custom-set! latex 'documentclass + "\\documentclass[sigplan]{acmart}") + + (engine-custom-set! latex 'maketitle #f) + + (markup-writer '&latex-author latex + :action (lambda (n e) + (let ((body (markup-body n))) + (for-each (lambda (a) + (display "\\author{") + (output (markup-option a :name) e) + (display "}\n\\affiliation{\n") + (display " \\institution{") + (output (markup-option a :affiliation) e) + (display "}\n \\city{") + (output (markup-option a :address) e) + (display "}}\n")) + (if (pair? body) body (list body)))))) + + (markup-writer '&acm-category latex + :options '(:index :section :subsection) + :action (lambda (n e) + (display "\\ccsdesc[") + (display (markup-option n :index)) + (display "]") + (display "{") + (display (markup-option n :section)) + (display "}\n"))) + + (markup-writer 'image latex + :options '(:file :url :width :height :zoom) + :action (lambda (n e) + (format #t "\n\\includegraphics[width=~a\\textwidth]{~a}\n" + (or (markup-option n :width) 0.5) + (markup-option n :file)))) + + + (markup-writer 'prog latex + :class "small" + :options '(:line :mark) + :before "\n\n\n\\begin{scriptsize}\n" + :action (lambda (n e) + ;; Delegate actual work to the "real" 'prog'. + (output (prog :line (markup-option n :line) + :mark (markup-option n :mark) + (node-body n)) + e)) + :after "\n\\end{scriptsize}\n") + + (markup-writer 'figure latex + :options '(:legend :number :multicolumns) + :action (lambda (n e) + (let ((ident (markup-ident n)) + (legend (markup-option n :legend)) + (mc (markup-option n :multicolumns))) + (display "\\begin{figure}[ht]\n\\begin{scriptsize}\n") + (output (markup-body n) e) + (display "\n\\end{scriptsize}\n") + (format #t "\\caption{\\label{~a}" + (string-canonicalize ident)) + (output legend e) + (display "}\\end{figure}\n"))))) + +(define (acmart-abstract . body) + (!latex "\\begin{abstract}\n$1\n\\end{abstract}\n" body)) + +(bibliography "../els-2013/guix.sbib") +(bibliography "../reppar-2015/reppar.sbib") +(bibliography "security.sbib") + + +(document :title [Secure Software Supply Chain with GNU Guix] +;; :author (list (author :name "Ludovic Courtès" +;; :affiliation "Inria" +;; :address (list "Bordeaux, France"))) + + (acmart-abstract + + (p [GNU Guix blah blah FIXME.])) + + ;; See . + (!latex "\\input{categories.tex}\n") + + (acm-keywords [Software deployment, Git, ]) + + (!latex "\\maketitle\n") + + (chapter :title [Introduction] + + (p [Package managers and related software deployment tools are in +a key position when it comes to securing the “software supply +chain”—they take source code fresh from repositories and providing users +with ready-to-use binaries. Between source code repositories and +binaries users run, many things can go wrong: binaries can be +compromised on their way to the user's machine, on the provider's +servers, or possibly indirectly ,(it [via]) toolchain compromission +,(ref :bib "thompson1984:trusting-trust"). Every software installation +and every upgrade can put users at risk.]) + + (p [GNU Guix is a set of software deployment tools and a +standalone GNU/Linux distribution; it includes a package manager similar +in spirit to Debian’s apt or Fedora’s yum. Unlike those, Guix builds +upon the ,(emph [functional deployment model]) pioneered by Nix,(ref +:bib "dolstra2004:nix"), a foundation for reproducible deployment, +reproducible builds, and provenance tracking. Guix is essentially a +“source-based” deployment tools: the ,(emph [model]) is that of a system +where every piece of software is built from source, and pre-built +binaries are viewed as a mere optimization and not as a central aspect +of its design.]) + + (p [This paper describes the design and implementation of Guix’s +secure update mechanism. ,(numref :text [Section] :ident "background") +gives background information necessary to understand the overall +deployment model of Guix. FIXME: complete])) + + (chapter :title [Background] :ident "background" + + (p [Users of free operating systems such as GNU/Linux are used to +,(emph [package managers]) like Debian's ,(tt [apt]), which allow them +to install, upgrade, and remove software from a large collection of free +software packages. GNU Guix,(footnote (url "https://guix.gnu.org")) is +primarily a ,(emph [functional]) package manager that builds upon the +ideas developed for Nix by Dolstra ,(it [et al.]) ,(ref :bib +'(dolstra2004:nix courtes2013:functional)). The term “functional” means +that software build processes are considered as pure functions: given a +set of inputs (compiler, libraries, build scripts, and so on), a +package’s build function is assumed to always produce the same result. +Build results are stored in an immutable persistent data structure, the +,(emph [store]), implemented as a single directory, ,(tt [/gnu/store]). +Each entry in ,(tt [/gnu/store]) has a file name composed of the hash of +all the build inputs used to produce it, followed by a symbolic name. +For example, ,(tt [/gnu/store/yr9rk90jf…-gcc-10.3.0]) identifies a +specific build of GCC 10.3. A variant of GCC 10.3, for instance one +using different build options or different dependencies, would get a +different hash. Thus, each store file name uniquely identifies build +results, and build processes are ,(emph [referentially transparent]).]) + (p [Guix, like Nix and unlike Debian or Fedora, is essentially a +,(emph [source-based distribution]): Guix package definitions describe +how to build packages from source. When running a command such as ,(tt +[guix install gcc]), Guix proceeds as if it were to build GCC from +source. As an optimization, users can enable fetching pre-built +binaries—called ,(emph [substitutes]) because they are substitutes for a +local build. In that case, instead of building locally, Guix asks one +or more servers for substitutes. In the example above, it would ask +specifically for substitutes for ,(tt +[/gnu/store/yr9rk90jf…-gcc-10.3.0]), which unambiguously identifies the +desired build output. Substitutes are cryptographically signed by the +server and Guix rejects substitutes not signed by one of the keys the +user authorized.]) + (p [To maximize chances that build processes actually look like +pure functions, they are spawned in isolated build environments—Linux +,(emph [containers])—ensuring that only explicitly declared inputs are +visible to the build process. This, in turn, helps achieve bit-for-bit +,(emph [reproducible builds]), which are critical from a security +standpoint ,(ref :bib 'lamb2021:reproducible). Reproducible builds +enable users and developers to verify that a binary matches a given +piece of source code: anyone can rebuild the package and ensure they +obtain the same binary, bit for bit. The explicit and unambiguous +mapping from source to binary that the functional deployment model +provides makes verification clear and easy. For example, the command +,(tt [guix build --check hello]) rebuilds the ,(tt [hello]) package +locally and prints an error if the build result differs from that +already available. Likewise, ,(tt [guix challenge hello]) compares +binaries of the ,(tt [hello]) package available locally with those +provided by one or several substitute servers.]) + (p [Are reproducible builds enough to guarantee that one can +verify source-to-binary mappings? In his Turing Award acceptance speech +,(ref :bib 'thompson1984:trusting-trust), Ken Thompson described a +scenario whereby a legitimate-looking build process would produce a +malicious binary—if that build process is reproducible, it just +reproducibly build a malicious binary. The attack Thompson described, +often referred to as a “Trusting Trust attack”, consists in targeting +the compilation toolchain, typically by modifying the compiler such that +it emits malicious code when it recognizes specific patterns of source +code. This attack can be undetectable. What makes such attacks +possible is that users and distributions rely on opaque binaries at some +level to “bootstrap” the entire package dependency graph.]) + (p [In 2017, Jan Nieuwenhuizen ,(it [et al.]) sought to address +this forty-year-old problem at its root: by ensuring no opaque binaries +appear in the package dependency graph—no less ,(ref :bib +'janneke:mes-web). To that end, Nieuwenhuizen developed GNU Mes, a +small interpreter of the Scheme language written in C, capable enough to +run MesCC, a non-optimizing C compiler. That, coupled with other heroic +efforts, led to a drastic reduction of the size of the opaque binaries +at the root of the Guix package graph, well below what had been achieved +so far ,(ref :bib 'janneke2020:bootstrap). While many considered it +unrealistic a few years earlier, the initial goal of building ,(emph +[everything]) from source, starting from a small core and incrementally +building more complex pieces of software, is now within reach. This has +the potential to thwart an entire class of software supply chain attacks +that has been known but left unaddressed for forty years.])) + + (chapter :title [Rationale] :ident "rationale" + + (p [As we have seen, Guix is conceptually a source-based +distribution. It addresses common classes of software supply chain +attacks in two ways: by reducing and eventually removing reliance on +opaque binaries at the root of its dependency graph, and by affording +reproducible builds. Guix users can choose to obtain pre-built binaries +for software they install, and reproducible builds guarantee that anyone +can verify that provides of those binaries are not distributing modified +or malicious versions.]) + + (p [The security issue that the remainder of this paper focuses on +is that of ,(emph [distributing updates securely]): how can users know +that updates to Guix and its package collection that they fetch are +genuine? The problem of securing software updates is often viewed +through the lens of binary distributions such as Debian, where the main +asset to be protected are binaries themselves. Guix being a +source-based distribution, the question has to be approached from a +different angle.]) + + (p [Guix consists of source code for the tools as well as package +definitions that make up the GNU/Linux distribution. All this code is +maintained under version control in a Git repository. To update Guix +and its package collection, users run ,(tt [guix pull])—the equivalent +of ,(tt [apt update]) in Debian. When users run ,(tt [guix pull]), what +happens behind the scene is equivalent to ,(tt [git clone]) or ,(tt [git +pull]). There are many ways this can go wrong. An attacker can trick +the user into pulling code from an alternate repository that contains +malicious code or definitions for backdoored packages. This is made +more difficult by the fact that code is fetched over HTTPS from Savannah +by default. If Savannah is compromised as happened in 2010,(footnote +[https://www.fsf.org/blogs/sysadmin/savannah-and-www.gnu.org-downtime]), +an attacker can push code to the Guix repository, which everyone would +pull. The change might even go unnoticed and remain in the repository +forever. An attacker with access to Savannah can also reset the main +branch to an earlier revision, leading users to install outdated +software with known vulnerabilities—a ,(emph [downgrade attack]). These +are the kind of attacks we want to protect against.])) + + (chapter :title [Authenticating Git checkouts] + :ident "authenticating" + + (p [If we take a step back, the problem we are trying to solve is +not specific to Guix and to software deployment tools: it’s about +,(emph [authenticating Git checkouts]). By that, we mean that when ,(tt [guix pull]) +obtains code from Git, it should be able to tell that all the commits it +fetched were pushed by authorized developers of the project. We are +really looking at individual commits, not tags, because users can choose +to pull arbitrary points in the commit history of Guix and third-party +channels.]) + + (p [Checkout authentication requires cryptographically signed +commits,(footnote +[https://git-scm.com/book/en/v2/Git-Tools-Signing-Your-Work]). By +signing a commit, a Guix developer asserts that they are the one who +made the commit; they may be its author, or they may be the person who +applied somebody else’s changes after review. It also requires a notion +of authorization: we don’t simply want commits to have a valid +signature, we want them to be signed by an authorized key. The set of +authorized keys changes over time as people join and leave the project.]) + + (p [To implement that, we came up with the following mechanism and rule: + +,(enumerate + (item [The repository contains a ,(tt [.guix-authorizations]) + file + that lists the OpenPGP key fingerprints of authorized committers.]) + (item [A commit is considered authentic if and only if it is signed by one + of the keys listed in the ,(tt [.guix-authorizations]) file of each of + its parents. This is the ,(emph [authorization invariant]).])) + +(Remember that Git commits form a directed acyclic graph (DAG) where +each commit can have zero or more parents; merge commits have two parent +commits, for instance. Do not miss _Git for Computer +Scientists_,(footnote [https://eagain.net/articles/git-for-computer-scientists/]) +for a pedagogical overview!)]) + + (figure + :legend [Graph of commits and the associated authorizations.] + :ident "fig-commits" + (image :file "images/commit-graph.pdf")) + + (p [Let’s take an example to illustrate. In ,(numref :text +[Figure] :ident "fig-commits"), each box is a commit, and each arrow is +a parent relationship.]) + + (p [This figure shows two lines of development: the orange line may be the +main development branch, while the purple line may correspond to a +feature branch that was eventually merged in commit ,(it [F]). ,(it [F]) is a merge +commit, so it has two parents: ,(it [D]) and ,(it [E]).]) + + (p [Labels next to boxes show who’s in ,(tt [.guix-authorizations]): for commit A, +only Alice is an authorized committer, and for all the other commits, +both Bob and Alice are authorized committers. For each commit, we see +that the authorization invariant holds; for example: + +,(itemize + (item [commit ,(it [B]) was made by Alice, who was the only authorized committer + in its parent, commit ,(it [A]);]) + (item [commit ,(it [C]) was made by Bob, who was among the authorized committers + as of commit ,(it [B]);]) + (item [commit ,(it [F]) was made by Alice, who was among the authorized + committers of both parents, commits ,(it [D]) and ,(it [E]).])) + +The authorization invariant has the nice property that it’s simple to +state, and it’s simple to check and enforce. This is what ,(tt [guix pull]) +implements. If your current Guix, as returned by ,(tt [guix describe]) is at +commit ,(it [A]) and you want to pull to commit ,(it [F]), ,(tt [guix pull]) traverses all +these commits and checks the authorization invariant.]) + + (p [Once a commit has been authenticated, all the commits in its +transitive closure are known to be already authenticated. ,(tt [guix pull]) +keeps a local cache of the commits it has previously authenticated, +which allows it to traverse only new commits. For instance, if you are +at commit ,(it [F]) and later update to a descendant of ,(it [F]), authentication +starts at ,(it [F]).]) + + (p [Since ,(tt [.guix-authorizations]) is a regular file under version +control, granting or revoking commit authorization does not require +special support. In the example above, commit ,(it [B]) is an authorized +commit by Alice that adds Bob’s key to ,(tt [.guix-authorizations]). +Revocation is similar: any authorized committer can remove entries from +,(tt [.guix-authorizations]). Key rotation can be handled similarly: a +committer can remove their former key and add their new key in a single +commit, signed by the former key.]) + + (p [The authorization invariant satisfies our needs for Guix. It +has one downside: it prevents pull-request-style workflows. Indeed, +merging the branch of a contributor not listed in ,(tt [.guix-authorizations]) +would break the authorization invariant. It’s a good tradeoff for Guix +because our workflow relies on patches carved into stone tablets +,(footnote [https://lwn.net/Articles/702177/]) (patch tracker,(footnote +[https://issues.guix.gnu.org/])), but it’s not suitable for every +project out there.])) + + (chapter :title [Bootstrapping] :ident "bootstrapping" + + (p [The attentive reader may have noticed that something’s missing +from the explanation above: what do we do about commit ,(it [A]) in the +example above? In other words, which commit do we pick as the first one +where we can start verifying the authorization invariant?]) + + (figure + :legend [The introductory commit in a graph.] + :ident "fig-commit-graph-intro" + (image :file "images/commit-graph-intro.pdf")) + + (p [We solve this bootstrapping issue by defining ,(emph [channel introductions]). +Previously, one would identify a channel simply by its URL. Now, when +introducing a channel to users, one needs to provide an additional piece +of information: the first commit where the authorization invariant +holds, and the fingerprint of the OpenPGP key used to sign that commit +(it’s not strictly necessary but provides an additional check).]) + + (p [Consider the commit graph on ,(numref :text [Figure] :ident +"fig-commit-graph-intro"). On this figure, ,(it [B]) is the introduction commit. Its +ancestors, such as ,(it [A]) are considered authentic. To authenticate, ,(it [C]), +,(it [D]), ,(it [E]), and ,(it [F]), we check the authorization invariant.]) + + (p [As always when it comes to establishing trust, distributing +channel introductions is very sensitive. The introduction of the +official ,(tt [guix]) channel is built into Guix. Users obtain it when they +install Guix the first time; hopefully they verify the signature on the +Guix tarball or ISO image, as noted in the installation instructions, +which reduces chances of getting the “wrong” Guix, but it is still very +much trust-on-first-use (TOFU).]) + + (figure + :legend [Specification of a channel along with its ,(emph [introduction]).] + :ident "fig-channel-spec" + + (prog :line #f + (source :language scheme [ +(channel + (name 'my-channel) + (url "https://example.org/my-channel.git") + (introduction + (make-channel-introduction + "6f0d8cc0d88abb59c324b2990bfee2876016bb86" + (openpgp-fingerprint + "CABB A931 C0FF EEC6 900D 0CFB 090B 1199 3D9A EBB5"))))]))) + + (p [Guix supports third-party channels providing extra software +packages. To use a third-party channel, one needs to add it to the ,(tt +[~/.config/guix/channels.scm]) configuration file, which contains a +declarative Scheme code snippet listing the desired channels. Authors +of third-party channels can also benefit from the channel authentication +mechanism: they need to sign commits, to include a ,(tt +[.guix-authorizations]) file and the list of relevant OpenPGP keys, and +to advertise the channel's introduction. Users then have to provide the +channel’s introduction in their ,(tt [channels.scm]) file, as shown in +,(numref :text [Figure] :ident "fig-channel-spec").]) + + (p [The ,(tt [guix describe]) command now prints the introduction if +there’s one. That way, one can share their channel configuration, +including introductions, without having to be an expert.]) + + (p [Channel introductions also solve another problem: forks. +Respecting the authorization invariant “forever” would effectively +prevent “unauthorized” forks—forks made by someone who’s not in +,(tt [.guix-authorizations]). Someone publishing a fork simply needs to emit +a new introduction for their fork, pointing to a different starting +commit.]) + + (p [Last, channel introductions give a ,(emph [point of reference]): if an +attacker manipulates branch heads on Savannah to have them point to +unrelated commits (such as commits on an orphan branch that do not share +any history with the “official” branches), authentication will +necessarily fail as it stumbles upon the first unauthorized commit made +by the attacker. In the figure above, the red branch with commits ,(it [G]) +and ,(it [H]) cannot be authenticated because it starts from ,(it [A]), which lacks +,(tt [.guix-authorizations]) and thus fails the authorization invariant.]) + + (p [That’s all for authentication! I’m glad you read this far. +At this point you can take a break or continue with the next section on +how ,(tt [guix pull]) prevents downgrade attacks.])) + + (chapter :title [Downgrade attacks] :ident "downgrade" + + (p [An important threat for software deployment tools is +,(emph [downgrade]) or ,(emph [roll-back]) attacks. The attack consists in tricking +users into installing older, known-vulnerable software packages, which +in turn may offer new ways to break into their system. This is not +strictly related to the authentication issue we’ve been discussing, +except that it’s another important issue in this area that we took the +opportunity to address.]) + + (p [Guix saves provenance info for itself: ,(tt [guix describe]) prints that +information, essentially the Git commits of the channels used during +,(tt [git pull]):] + + (prog :class "small" :line #f [ +$ guix describe +Generation 149 Jun 17 2020 20:00:14 (current) + guix 8b1f7c0 + repository URL: https://git.savannah.gnu.org/git/guix.git + branch: master + commit: 8b1f7c03d239ca703b56f2a6e5f228c79bc1857e +]) + +[Thus, ,(tt [guix pull]), once it has retrieved the latest commit of the +selected branch, can verify that it is doing a ,(emph [fast-forward update]) in +Git parlance—just like ,(tt [git pull]) does, but compared to the +previously-deployed Guix. A fast-forward update is when the new commit +is a descendant of the current commit. Going back to the figure above, +going from commit ,(it [A]) to commit ,(it [F]) is a fast-forward update, but going +from ,(it [F]) to ,(it [A]) or from ,(it [D]) to ,(it [E]) is not.]) + + (p [Not doing a fast-forward update would mean that the user is deploying an +older version of the Guix currently used, or deploying an unrelated +version from another branch. In both cases, the user is at risk of +ending up installing older, vulnerable software.]) + + (p [By default ,(tt [guix pull]) now errors out on non-fast-forward updates, +thereby protecting from roll-backs. Users who understand the +risks can override that by passing +,(tt [--allow-downgrades]).])) + + (chapter :title [Mirrors and the risk of staleness] + :ident "mirrors" + + (p [Authentication and roll-back prevention allow users to safely +refer to mirrors of the Git repository. If the official repository at +,(tt [git.savannah.gnu.org]) is down, one can still update by fetching +from a mirror, for instance with:] + + (prog :line #f :class "small" [ +guix pull --url=https://github.com/guix-mirror/guix +]) + +[If the repository at this URL is behind what the user already deployed, +or if it’s not a genuine mirror, ,(tt [guix pull]) will abort. In other +cases, it will proceed.]) + + (p [Unfortunately, there is no way to answer the general question +“,(it [is]) X ,(it [the latest commit of branch]) B ,(it [?])”. +Rollback detection prevents just that, rollbacks, but there is no +mechanism in place to tell whether a given mirror is stale. To mitigate +that, channel authors can specify, in the repository, the channel’s +,(emph [primary URL]). This piece of information lives in the +`.guix-channel` file, in the repository, so it’s authenticated. ,(tt +[guix pull]) uses it to print a warning when the user pulls from a +mirror:] + + (prog :line #f :class "small" [ +$ guix pull --url=https://github.com/guix-mirror/guix +Updating channel 'guix' from 'https://github.com/guix-mirror/guix'... +Authenticating channel 'guix', commits 9edb3f6 to 3e51f9e... +guix pull: warning: pulled channel 'guix' from a mirror of + https://git.savannah.gnu.org/git/guix.git, which might be stale + +Building from this channel: + guix https://github.com/guix-mirror/guix 3e51f9e +… +])) + + (p [So far we talked about mechanics in a rather abstract way. That might +satisfy the graph theorist or the Git geek in you, but if you are up for +a quick tour of the implementation, the next section is for you!])) + + (chapter :title [Implementation]) + + (chapter :title [SHA-1] :ident "sha1" + + (p [We can’t really discuss Git commit signing without mentioning +SHA-1. The venerable crytographic hash function is approaching end of +life, as evidenced by recent breakthroughs ,(ref :bib +'(stevens2017:collision leurent2020:shambles)). Signing a Git commit +boils down to signing a SHA-1 hash, because all objects in the Git store +are identified by their SHA-1 hash.]) + + (p [Git now relies on a collision attack detection library ,(ref +:bib 'stevens2017:detection) to mitigate practical attacks. +Furthermore, the Git project is planning a hash function +transition,(footnote +[https://git-scm.com/docs/hash-function-transition/]) to address the +problem.]) + + (p [Some projects such as Bitcoin Core choose to not rely on SHA-1 +at all. Instead, for the commits they sign, they include in the commit +log the SHA512 hash of the tree, which the verification scripts +check,(footnote +[https://github.com/bitcoin/bitcoin/tree/master/contrib/verify-commits]). +Computing a tree hash ,(emph [for each commit]) in Guix would probably be +prohibitively costly. For now, for lack of a better solution, we rely +on Git’s collision attack detection and look forward to a hash function +transition.]) + + (p [As for SHA-1 in an OpenPGP context: our authentication code +rejects SHA-1 OpenPGP signatures, as recommended.])) + + (chapter :title [Related Work] + :ident "related" + + (p [A lot of work has gone into securing the software supply chain, often in +the context of binary distros, sometimes in a more general context; more +recent work also looks into Git authentication and related issues. +This section attempts to summarize how Guix relates to similar work that +we are aware of in these two areas. More detailed discussions can be +found in the issue tracker,(footnote [https://issues.guix.gnu.org/22883]).]) + + (p [The Update Framework,(footnote +[https://theupdateframework.io/]) (TUF) is a reference for secure update +systems, with a well-structured spec,(footnote +[https://github.com/theupdateframework/specification/blob/master/tuf-spec.md#the-update-framework-specification]) +and a number of implementations. TUF is a great source of inspiration +to think about this problem space. Many of its goals are shared by +Guix. Not all the attacks it aims to protect against (Section 1.5.2 of +the spec) are addressed by what’s presented in this post: ,(tt [indefinite +freeze attacks]), where updates never become available, are not addressed +,(emph [per se]) (though easily observable), and ,(emph [slow retrieval attacks]) aren’t +addressed either. The notion of ,(emph [role]) is also something currently +missing from the Guix authentication model, where any authorized +committer can touch any files, though the model and +,(tt [.guix-authorizations]) format leave room for such an extension.]) + + (p [However, both in its goals and system descriptions, TUF is +biased towards systems that distribute binaries as plain files with +associated meta-data. That creates a fundamental impedance mismatch. +As an example, attacks such as ,(emph [fast-forward attacks]) or ,(emph +[mix-and-match attacks]) don’t apply in the context of Guix; likewise, +the ,(emph [repository]) depicted in Section 3 of the spec has little in +common with a Git repository.]) + + (p [Developers of OPAM, the OCaml package manager, adapted TUF for +use with their Git-based package repository,(footnote +[http://opam.ocaml.org/blog/Signing-the-opam-repository/]), later +updated to write Conex,(footnote [https://github.com/hannesm/conex]), a +separate tool to authenticate OPAM repositories. OPAM is interesting +because like Guix it’s a source distro and its package +repository,(footnote [https://github.com/ocaml/opam-repository]) is a +Git repository containing “build recipe”. To date, it appears that +,(tt [opam update]) itself does not authenticate repositories though; it’s up +to users or developer to run Conex.]) + + (p [Another interesting approach is to focus on the impact of +malicious modifications to Git repository meta-data ,(ref :bib +"torresarias2016:omitting"). An attacker with access to the repository +can modify, for instance, branch references, to cause a rollback attack +or a “teleport” attack, causing users to pull an older commit or an +unrelated commit. As written above, ,(tt [guix pull]) would detect such +attacks. However, ,(tt [guix pull]) would fail to detect cases where +metadata modification does not yield a rollback or teleport, yet gives +users a different view than the intended one—for instance, a user is +directed to an authentic but different branch rather than the intended +one. The “secure push” operation and the associated ,(emph [reference +state log]) (RSL) the authors propose would be an improvement.])) + + (chapter :title [Conclusion and outlook] + :ident "conclusion" + + (p [Guix now has a mechanism that allows it to authenticate +updates. If you’ve run ,(tt [guix pull]) recently, perhaps you’ve noticed +additional output and a progress bar as new commits are being +authenticated. Apart from that, the switch has been completely +transparent. The authentication mechanism is built around the commit +graph of Git; in fact, it’s a mechanism to ,(emph [authenticate Git checkouts]) +and in that sense it is not tied to Guix and its application domain. It +is available not only for the main ,(tt [guix]) channel, but also for +third-party channels.]) + + (p [To bootstrap trust, we added the notion of _channel +introductions_. These are now visible in the user interface, in +particular in the output of ,(tt [guix describe]) and in the configuration +file of ,(tt [guix pull]) and ,(tt [guix time-machine]). While channel +configuration remains a few lines of code that users typically paste, +this extra bit of configuration might be intimidating. It certainly +gives an incentive to provide a command-line interface to manage the +user’s list of channels: ,(tt [guix channel add]), etc.]) + + (p [The solution here is built around the assumption that Guix is +fundamentally a source-based distribution, and is thus completely +orthogonal to the public key infrastructure (PKI) Guix uses for the +signature of substitutes. Yet, the substitute PKI could probably +benefit from the fact that we now have a secure update mechanism for the +Guix source code: since ,(tt [guix pull]) can securely retrieve a new +substitute signing key, perhaps it could somehow handle substitute +signing key revocation and delegation automatically? Related to that, +channels could perhaps advertise a substitute URL and its signing key, +possibly allowing users to register those when they first pull from the +channel. All this requires more thought, but it looks like there are +new opportunities here.])) + + ;; in-toto: https://www.usenix.org/conference/usenixsecurity19/presentation/torres-arias + + (!latex "\n\\newpage\n\\balance\n") + (references)) + +;;; Local Variables: +;;; coding: utf-8 +;;; ispell-local-dictionary: "american" +;;; compile-command: "make supply-chain.pdf" +;;; comment-start: ";;" +;;; End: