doc: First stab at a "Secure Supply Chain" paper.

* doc/ccs-2021: New directory.
This commit is contained in:
Ludovic Courtès 2021-04-29 15:35:38 +02:00 committed by Ludovic Courtès
parent 601691e7ea
commit 7d4365e9df
No known key found for this signature in database
GPG Key ID: 090B11993D9AEBB5
7 changed files with 918 additions and 0 deletions

7
.gitignore vendored
View File

@ -223,3 +223,10 @@ talk.vrb
/doc/sif-2021/article-1024.bib
/doc/sif-2021/article-1024.pdf
/doc/sif-2021/article-1024.tex
*.aux
*.log
*.out
/doc/ccs-2021/supply-chain.tex
/doc/ccs-2021/supply-chain.pdf
/doc/ccs-2021/images/commit-graph-intro.pdf
/doc/ccs-2021/images/commit-graph.pdf

21
doc/ccs-2021/GNUmakefile Normal file
View File

@ -0,0 +1,21 @@
SKRIBILO := skribilo
PDFLATEX := pdflatex
RUBBER := rubber
DOT := dot
DOT-OPTS := -Gratio=.78 -Gwidth=8cm
.DEFAULT_GOAL := supply-chain.pdf
ILLUSTRATIONS := \
images/commit-graph.pdf \
images/commit-graph-intro.pdf
supply-chain.pdf: $(ILLUSTRATIONS)
%.pdf: %.tex
$(RUBBER) --pdf -I $$PWD "$<"
%.tex: %.skb
$(SKRIBILO) -t latex -o "$@" "$<"
%.pdf: %.dot
$(DOT) -Tpdf $(DOT-OPTS) < "$<" > "$@.tmp"
mv "$@.tmp" "$@"

View File

@ -0,0 +1,23 @@
\begin{CCSXML}
<ccs2012>
<concept>
<concept_id>10011007.10011006.10011041.10011047</concept_id>
<concept_desc>Software and its engineering~Source code generation</concept_desc>
<concept_significance>500</concept_significance>
</concept>
<concept>
<concept_id>10011007.10011006.10011008.10011009.10011012</concept_id>
<concept_desc>Software and its engineering~Functional languages</concept_desc>
<concept_significance>300</concept_significance>
</concept>
<concept>
<concept_id>10011007.10011074.10011111.10011697</concept_id>
<concept_desc>Software and its engineering~System administration</concept_desc>
<concept_significance>300</concept_significance>
</concept>
</ccs2012>
\end{CCSXML}
\ccsdesc[500]{Software and its engineering~Source code generation}
\ccsdesc[300]{Software and its engineering~Functional languages}
\ccsdesc[300]{Software and its engineering~System administration}

View File

@ -0,0 +1,21 @@
digraph "Grafts" {
A [ label = "A", shape = box, fontname = Helvetica, color="#22aa44" ];
B [ label = "B", shape = box, fontname = Helvetica, color="#22aa44",
style = bold, xlabel = "introductory commit" ];
C [ label = "C", shape = box, fontname = Helvetica, color="#22aa44" ];
D [ label = "D", shape = box, fontname = Helvetica, color="#22aa44" ];
E [ label = "E", shape = box, fontname = Helvetica, color="#22aa44" ];
F [ label = "F", shape = box, fontname = Helvetica, color="#22aa44" ];
G [ label = "G", shape = box, fontname = Helvetica, color="#22aa44",
style = filled, fillcolor = "#ddeedd", color = "#ff0000", fontcolor = "#ff0000" ];
H [ label = "H", shape = box, fontname = Helvetica, color="#22aa44"
style = filled, fillcolor = "#ddeedd", color = "#ff0000", fontcolor = "#ff0000" ];
B -> A [ color = orange ];
C -> B [ color = orange ];
D -> C [ color = orange ];
E -> C [ color = darkviolet ];
F -> E [ color = darkviolet ];
G -> A [ color = red ];
H -> G [ color = red ];
}

View File

@ -0,0 +1,16 @@
digraph "Grafts" {
ratio = .4;
A [ label = "A\nauthor: Alice", shape = box, fontname = Helvetica, xlabel = "authorized: Alice", color="#aa4422" ];
B [ label = "B\nauthor: Alice", shape = box, fontname = Helvetica, xlabel = "authorized: Alice, Bob", color="#aa4422" ];
C [ label = "C\nauthor: Bob", shape = box, fontname = Helvetica, xlabel = "authorized: Alice, Bob", color="#22aa44" ];
D [ label = "D\nauthor: Alice", shape = box, fontname = Helvetica, xlabel = "authorized: Alice, Bob", color="#aa4422" ];
E [ label = "E\nauthor: Bob", shape = box, fontname = Helvetica, xlabel = "authorized: Alice, Bob", color="#22aa44" ];
F [ label = "F\nauthor: Alice", shape = box, fontname = Helvetica, xlabel = "authorized: Alice, Bob", color="#aa4422" ];
B -> A [ color = orange ];
C -> B [ color = orange ];
D -> C [ color = orange ];
F -> D [ color = orange ];
F -> E [ color = darkviolet ];
E -> B [ color = darkviolet ];
}

106
doc/ccs-2021/security.sbib Normal file
View File

@ -0,0 +1,106 @@
(article lamb2021:reproducible
(author "Chris Lamb and Stefano Zacchiroli")
(title "Reproducible Builds: Increasing the Integrity of Software Supply Chains")
(publisher "IEEE Computer Society")
(year "2021")
(issn "0740-7459")
(doi "10.1109/MS.2021.3073045")
(journal "IEEE Software"))
(inproceedings torresarias2016:omitting
(author "Santiago Torres-Arias and Anil Kumar Ammula and Reza Curtmola and Justin Cappos")
(title "On Omitting Commits and Committing Omissions: Preventing Git Metadata Tampering That (Re)introduces Software Vulnerabilities")
(booktitle "25th USENIX Security Symposium")
(year "2016")
(isbn "978-1-931971-32-4")
(address "Austin, TX")
(pages "379--395")
(url "https://www.usenix.org/conference/usenixsecurity16/technical-sessions/presentation/torres-arias")
(publisher "USENIX Association")
(month "August"))
(inproceedings torresarias2019:intoto
(author "Santiago Torres-Arias and Hammad Afzali and Trishank Karthik Kuppusamy and Reza Curtmola and Justin Cappos")
(title "in-toto: Providing farm-to-table guarantees for bits and bytes")
(booktitle "28th USENIX Security Symposium")
(year "2019")
(isbn "978-1-939133-06-9")
(address "Santa Clara, CA")
(pages "1393--1410")
(url "https://www.usenix.org/conference/usenixsecurity19/presentation/torres-arias")
(publisher "USENIX Association")
(month "Aug"))
(misc janneke:mes-web
(title "GNU Mes web site")
(author "Jan Nieuwenhuizen")
(url "https://gnu.org/software/mes")
(year "2021"))
(misc janneke2020:bootstrap
(title "Guix Further Reduces Bootstrap Seed to 25%")
(author "Jan Nieuwenhuizen")
(year "2020")
(month "June")
(url "https://guix.gnu.org/en/blog/2020/guix-further-reduces-bootstrap-seed-to-25/"))
(article thompson1984:trusting-trust
(author "Thompson, Ken")
(title "Reflections on Trusting Trust")
(year "1984")
(issue_date "Aug 1984")
(publisher "Association for Computing Machinery")
(address "New York, NY, USA")
(volume "27")
(number "8")
(issn "0001-0782")
(url "https://doi.org/10.1145/358198.358210")
(doi "10.1145/358198.358210")
(journal "Communications of the ACM")
(month "August")
(pages "761--763"))
(inproceedings stevens2017:detection
(author "Stevens, Marc and Shumow, Daniel")
(title "Speeding up Detection of SHA-1 Collision Attacks Using Unavoidable Attack Conditions")
(year "2017")
(isbn "9781931971409")
(publisher "USENIX Association")
(address "USA")
(booktitle "Proceedings of the 26th USENIX Conference on Security Symposium")
(pages "881897")
(numpages "17")
(location "Vancouver, BC, Canada")
(series "SEC'17"))
(inproceedings stevens2017:collision
(author "Marc Stevens and Elie Bursztein and Pierre Karpman and Ange Albertini and Yarik Markov")
(editor "Katz, Jonathan
and Shacham, Hovav")
(title "The First Collision for Full SHA-1")
(booktitle "Advances in Cryptology -- CRYPTO 2017")
(year "2017")
(publisher "Springer International Publishing")
;;(address "Cham")
(pages "570--596")
(isbn "978-3-319-63688-7"))
(inproceedings leurent2020:shambles
(author "Gaëtan Leurent and Thomas Peyrin")
(title "SHA-1 is a Shambles: First Chosen-Prefix Collision on SHA-1 and Application to the PGP Web of Trust")
(booktitle "29th USENIX Security Symposium (USENIX Security 20)")
(year "2020")
(isbn "978-1-939133-17-5")
(pages "1839--1856")
(url "https://www.usenix.org/conference/usenixsecurity20/presentation/leurent")
(publisher "USENIX Association")
(month "August"))
#|
(defun skr-from-bibtex ()
"Vaguely convert the BibTeX snippets after POINT to SBibTeX."
(interactive)
(while (re-search-forward "\\([a-z_-]+\\) *= *[{\"]\\([^}\"]+\\)[}\"] *, *$" nil nil)
(replace-match "(\\1 \"\\2\")")))
|#

View File

@ -0,0 +1,724 @@
(use-modules (skribilo package acmproc)
(skribilo engine)
(skribilo engine latex)
(skribilo ast)
(skribilo writer)
(skribilo output)
(skribilo utils strings)
(skribilo lib)
(skribilo evaluator)
(skribilo biblio author)
(skribilo source)
(skribilo source lisp)
(skribilo source parameters))
(define (---) ; emdash
(resolve (lambda (n e env)
(if (engine-format? "html" e)
(! "&mdash;")
(! "---")))))
(define (--) ; endash
(resolve (lambda (n e env)
(if (engine-format? "html" e)
(! "&ndash;")
(! "--")))))
(define (dash-dash)
(resolve (lambda (n e env)
(if (engine-format? "latex" e)
(! "{-}{-}")
"--"))))
(define (url url)
(ref :text (tt url) :url url))
(define (=>)
(symbol "=>"))
;; XXX: Terrible hack to turn hyphens into hyphenation points in 'tt'.
(set! (@@ (skribilo engine latex) latex-tt-encoding)
`((#\- "-\\-")
,@(@@ (skribilo engine latex) latex-tt-encoding)))
;; For pdflatex.
(engine-custom-set! (find-engine 'latex) 'image-format '("pdf"))
;; Avoid "option clash" with acmart.
(engine-custom-set! (find-engine 'latex) 'hyperref #f)
(engine-custom-set! (find-engine 'latex) 'usepackage
(let ((u (engine-custom (find-engine 'latex)
'usepackage)))
;; See <https://en.wikibooks.org/wiki/LaTeX/Labels_and_Cross-referencing>
;; and <http://tug.org/pipermail/texhax/2010-September/015596.html>.
(string-append u "\n"
"\\usepackage{microtype}\n"
;; "\\usepackage[hypcap]{caption}\n"
;; "\\DeclareCaptionType{copyrightbox}\n"
"\\usepackage{balance}\n"
;; Trick so that ‘…’ is properly
;; typeset inside teletype text.
"\\DeclareUnicodeCharacter{2026}{\\textrm{\\ldots}}\n"
"\
\\fancyhf{} % Remove fancy page headers
\\fancyhead[C]{Anonymous submission \\#9999 to ACM CCS 2021} % TODO: replace 9999 with your paper number
\\fancyfoot[C]{\\thepage}
\\setcopyright{none} % No copyright notice required for submissions
\\acmConference[Anonymous Submission to ACM CCS 2021]{ACM Conference on Computer and Communications Security}{Due 15 May 2021}{Seoul}
\\acmYear{2021}
\\settopmatter{printacmref=false, printccs=true, printfolios=true} % We want page numbers on submissions
%%\\ccsPaper{9999} % TODO: replace with your paper number once obtained
\\let\\oldthing\\footnotetextcopyrightpermission
\\renewcommand\\footnotetextcopyrightpermission[1]{\\oldthing{
% Per
% <https://www.legifrance.gouv.fr/affichCodeArticle.do?cidTexte=LEGITEXT000006069414&idArticle=LEGIARTI000006278959&dateTexte=20170831>,
% the copyright holder is me personally, not Inria.
Copyright \\copyright 2021 Ludovic Courtès.\\
Permission is granted to copy, distribute and/or modify this document
under the terms of the GNU Free Documentation License, Version 1.3
or any later version published by the Free Software Foundation;
with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.
A copy of the license is
available at \\url{https://www.gnu.org/licenses/gfdl.html}.
% Give a link to the 'Transparent Copy', as per Section 3 of the GFDL.
The source of this document is available from
\\url{https://git.sv.gnu.org/cgit/guix/maintenance.git}.
}}
")))
(let ((latex (find-engine 'latex)))
(engine-custom-set! latex 'documentclass
"\\documentclass[sigplan]{acmart}")
(engine-custom-set! latex 'maketitle #f)
(markup-writer '&latex-author latex
:action (lambda (n e)
(let ((body (markup-body n)))
(for-each (lambda (a)
(display "\\author{")
(output (markup-option a :name) e)
(display "}\n\\affiliation{\n")
(display " \\institution{")
(output (markup-option a :affiliation) e)
(display "}\n \\city{")
(output (markup-option a :address) e)
(display "}}\n"))
(if (pair? body) body (list body))))))
(markup-writer '&acm-category latex
:options '(:index :section :subsection)
:action (lambda (n e)
(display "\\ccsdesc[")
(display (markup-option n :index))
(display "]")
(display "{")
(display (markup-option n :section))
(display "}\n")))
(markup-writer 'image latex
:options '(:file :url :width :height :zoom)
:action (lambda (n e)
(format #t "\n\\includegraphics[width=~a\\textwidth]{~a}\n"
(or (markup-option n :width) 0.5)
(markup-option n :file))))
(markup-writer 'prog latex
:class "small"
:options '(:line :mark)
:before "\n\n\n\\begin{scriptsize}\n"
:action (lambda (n e)
;; Delegate actual work to the "real" 'prog'.
(output (prog :line (markup-option n :line)
:mark (markup-option n :mark)
(node-body n))
e))
:after "\n\\end{scriptsize}\n")
(markup-writer 'figure latex
:options '(:legend :number :multicolumns)
:action (lambda (n e)
(let ((ident (markup-ident n))
(legend (markup-option n :legend))
(mc (markup-option n :multicolumns)))
(display "\\begin{figure}[ht]\n\\begin{scriptsize}\n")
(output (markup-body n) e)
(display "\n\\end{scriptsize}\n")
(format #t "\\caption{\\label{~a}"
(string-canonicalize ident))
(output legend e)
(display "}\\end{figure}\n")))))
(define (acmart-abstract . body)
(!latex "\\begin{abstract}\n$1\n\\end{abstract}\n" body))
(bibliography "../els-2013/guix.sbib")
(bibliography "../reppar-2015/reppar.sbib")
(bibliography "security.sbib")
(document :title [Secure Software Supply Chain with GNU Guix]
;; :author (list (author :name "Ludovic Courtès"
;; :affiliation "Inria"
;; :address (list "Bordeaux, France")))
(acmart-abstract
(p [GNU Guix blah blah FIXME.]))
;; See <http://dl.acm.org/ccs/ccs_flat.cfm>.
(!latex "\\input{categories.tex}\n")
(acm-keywords [Software deployment, Git, ])
(!latex "\\maketitle\n")
(chapter :title [Introduction]
(p [Package managers and related software deployment tools are in
a key position when it comes to securing the “software supply
chain”—they take source code fresh from repositories and providing users
with ready-to-use binaries. Between source code repositories and
binaries users run, many things can go wrong: binaries can be
compromised on their way to the user's machine, on the provider's
servers, or possibly indirectly ,(it [via]) toolchain compromission
,(ref :bib "thompson1984:trusting-trust"). Every software installation
and every upgrade can put users at risk.])
(p [GNU Guix is a set of software deployment tools and a
standalone GNU/Linux distribution; it includes a package manager similar
in spirit to Debians apt or Fedoras yum. Unlike those, Guix builds
upon the ,(emph [functional deployment model]) pioneered by Nix,(ref
:bib "dolstra2004:nix"), a foundation for reproducible deployment,
reproducible builds, and provenance tracking. Guix is essentially a
“source-based” deployment tools: the ,(emph [model]) is that of a system
where every piece of software is built from source, and pre-built
binaries are viewed as a mere optimization and not as a central aspect
of its design.])
(p [This paper describes the design and implementation of Guixs
secure update mechanism. ,(numref :text [Section] :ident "background")
gives background information necessary to understand the overall
deployment model of Guix. FIXME: complete]))
(chapter :title [Background] :ident "background"
(p [Users of free operating systems such as GNU/Linux are used to
,(emph [package managers]) like Debian's ,(tt [apt]), which allow them
to install, upgrade, and remove software from a large collection of free
software packages. GNU Guix,(footnote (url "https://guix.gnu.org")) is
primarily a ,(emph [functional]) package manager that builds upon the
ideas developed for Nix by Dolstra ,(it [et al.]) ,(ref :bib
'(dolstra2004:nix courtes2013:functional)). The term “functional” means
that software build processes are considered as pure functions: given a
set of inputs (compiler, libraries, build scripts, and so on), a
packages build function is assumed to always produce the same result.
Build results are stored in an immutable persistent data structure, the
,(emph [store]), implemented as a single directory, ,(tt [/gnu/store]).
Each entry in ,(tt [/gnu/store]) has a file name composed of the hash of
all the build inputs used to produce it, followed by a symbolic name.
For example, ,(tt [/gnu/store/yr9rk90jf…-gcc-10.3.0]) identifies a
specific build of GCC 10.3. A variant of GCC 10.3, for instance one
using different build options or different dependencies, would get a
different hash. Thus, each store file name uniquely identifies build
results, and build processes are ,(emph [referentially transparent]).])
(p [Guix, like Nix and unlike Debian or Fedora, is essentially a
,(emph [source-based distribution]): Guix package definitions describe
how to build packages from source. When running a command such as ,(tt
[guix install gcc]), Guix proceeds as if it were to build GCC from
source. As an optimization, users can enable fetching pre-built
binaries—called ,(emph [substitutes]) because they are substitutes for a
local build. In that case, instead of building locally, Guix asks one
or more servers for substitutes. In the example above, it would ask
specifically for substitutes for ,(tt
[/gnu/store/yr9rk90jf…-gcc-10.3.0]), which unambiguously identifies the
desired build output. Substitutes are cryptographically signed by the
server and Guix rejects substitutes not signed by one of the keys the
user authorized.])
(p [To maximize chances that build processes actually look like
pure functions, they are spawned in isolated build environments—Linux
,(emph [containers])—ensuring that only explicitly declared inputs are
visible to the build process. This, in turn, helps achieve bit-for-bit
,(emph [reproducible builds]), which are critical from a security
standpoint ,(ref :bib 'lamb2021:reproducible). Reproducible builds
enable users and developers to verify that a binary matches a given
piece of source code: anyone can rebuild the package and ensure they
obtain the same binary, bit for bit. The explicit and unambiguous
mapping from source to binary that the functional deployment model
provides makes verification clear and easy. For example, the command
,(tt [guix build --check hello]) rebuilds the ,(tt [hello]) package
locally and prints an error if the build result differs from that
already available. Likewise, ,(tt [guix challenge hello]) compares
binaries of the ,(tt [hello]) package available locally with those
provided by one or several substitute servers.])
(p [Are reproducible builds enough to guarantee that one can
verify source-to-binary mappings? In his Turing Award acceptance speech
,(ref :bib 'thompson1984:trusting-trust), Ken Thompson described a
scenario whereby a legitimate-looking build process would produce a
malicious binary—if that build process is reproducible, it just
reproducibly build a malicious binary. The attack Thompson described,
often referred to as a “Trusting Trust attack”, consists in targeting
the compilation toolchain, typically by modifying the compiler such that
it emits malicious code when it recognizes specific patterns of source
code. This attack can be undetectable. What makes such attacks
possible is that users and distributions rely on opaque binaries at some
level to “bootstrap” the entire package dependency graph.])
(p [In 2017, Jan Nieuwenhuizen ,(it [et al.]) sought to address
this forty-year-old problem at its root: by ensuring no opaque binaries
appear in the package dependency graph—no less ,(ref :bib
'janneke:mes-web). To that end, Nieuwenhuizen developed GNU Mes, a
small interpreter of the Scheme language written in C, capable enough to
run MesCC, a non-optimizing C compiler. That, coupled with other heroic
efforts, led to a drastic reduction of the size of the opaque binaries
at the root of the Guix package graph, well below what had been achieved
so far ,(ref :bib 'janneke2020:bootstrap). While many considered it
unrealistic a few years earlier, the initial goal of building ,(emph
[everything]) from source, starting from a small core and incrementally
building more complex pieces of software, is now within reach. This has
the potential to thwart an entire class of software supply chain attacks
that has been known but left unaddressed for forty years.]))
(chapter :title [Rationale] :ident "rationale"
(p [As we have seen, Guix is conceptually a source-based
distribution. It addresses common classes of software supply chain
attacks in two ways: by reducing and eventually removing reliance on
opaque binaries at the root of its dependency graph, and by affording
reproducible builds. Guix users can choose to obtain pre-built binaries
for software they install, and reproducible builds guarantee that anyone
can verify that provides of those binaries are not distributing modified
or malicious versions.])
(p [The security issue that the remainder of this paper focuses on
is that of ,(emph [distributing updates securely]): how can users know
that updates to Guix and its package collection that they fetch are
genuine? The problem of securing software updates is often viewed
through the lens of binary distributions such as Debian, where the main
asset to be protected are binaries themselves. Guix being a
source-based distribution, the question has to be approached from a
different angle.])
(p [Guix consists of source code for the tools as well as package
definitions that make up the GNU/Linux distribution. All this code is
maintained under version control in a Git repository. To update Guix
and its package collection, users run ,(tt [guix pull])—the equivalent
of ,(tt [apt update]) in Debian. When users run ,(tt [guix pull]), what
happens behind the scene is equivalent to ,(tt [git clone]) or ,(tt [git
pull]). There are many ways this can go wrong. An attacker can trick
the user into pulling code from an alternate repository that contains
malicious code or definitions for backdoored packages. This is made
more difficult by the fact that code is fetched over HTTPS from Savannah
by default. If Savannah is compromised as happened in 2010,(footnote
[https://www.fsf.org/blogs/sysadmin/savannah-and-www.gnu.org-downtime]),
an attacker can push code to the Guix repository, which everyone would
pull. The change might even go unnoticed and remain in the repository
forever. An attacker with access to Savannah can also reset the main
branch to an earlier revision, leading users to install outdated
software with known vulnerabilities—a ,(emph [downgrade attack]). These
are the kind of attacks we want to protect against.]))
(chapter :title [Authenticating Git checkouts]
:ident "authenticating"
(p [If we take a step back, the problem we are trying to solve is
not specific to Guix and to software deployment tools: its about
,(emph [authenticating Git checkouts]). By that, we mean that when ,(tt [guix pull])
obtains code from Git, it should be able to tell that all the commits it
fetched were pushed by authorized developers of the project. We are
really looking at individual commits, not tags, because users can choose
to pull arbitrary points in the commit history of Guix and third-party
channels.])
(p [Checkout authentication requires cryptographically signed
commits,(footnote
[https://git-scm.com/book/en/v2/Git-Tools-Signing-Your-Work]). By
signing a commit, a Guix developer asserts that they are the one who
made the commit; they may be its author, or they may be the person who
applied somebody elses changes after review. It also requires a notion
of authorization: we dont simply want commits to have a valid
signature, we want them to be signed by an authorized key. The set of
authorized keys changes over time as people join and leave the project.])
(p [To implement that, we came up with the following mechanism and rule:
,(enumerate
(item [The repository contains a ,(tt [.guix-authorizations])
file
that lists the OpenPGP key fingerprints of authorized committers.])
(item [A commit is considered authentic if and only if it is signed by one
of the keys listed in the ,(tt [.guix-authorizations]) file of each of
its parents. This is the ,(emph [authorization invariant]).]))
(Remember that Git commits form a directed acyclic graph (DAG) where
each commit can have zero or more parents; merge commits have two parent
commits, for instance. Do not miss _Git for Computer
Scientists_,(footnote [https://eagain.net/articles/git-for-computer-scientists/])
for a pedagogical overview!)])
(figure
:legend [Graph of commits and the associated authorizations.]
:ident "fig-commits"
(image :file "images/commit-graph.pdf"))
(p [Lets take an example to illustrate. In ,(numref :text
[Figure] :ident "fig-commits"), each box is a commit, and each arrow is
a parent relationship.])
(p [This figure shows two lines of development: the orange line may be the
main development branch, while the purple line may correspond to a
feature branch that was eventually merged in commit ,(it [F]). ,(it [F]) is a merge
commit, so it has two parents: ,(it [D]) and ,(it [E]).])
(p [Labels next to boxes show whos in ,(tt [.guix-authorizations]): for commit A,
only Alice is an authorized committer, and for all the other commits,
both Bob and Alice are authorized committers. For each commit, we see
that the authorization invariant holds; for example:
,(itemize
(item [commit ,(it [B]) was made by Alice, who was the only authorized committer
in its parent, commit ,(it [A]);])
(item [commit ,(it [C]) was made by Bob, who was among the authorized committers
as of commit ,(it [B]);])
(item [commit ,(it [F]) was made by Alice, who was among the authorized
committers of both parents, commits ,(it [D]) and ,(it [E]).]))
The authorization invariant has the nice property that its simple to
state, and its simple to check and enforce. This is what ,(tt [guix pull])
implements. If your current Guix, as returned by ,(tt [guix describe]) is at
commit ,(it [A]) and you want to pull to commit ,(it [F]), ,(tt [guix pull]) traverses all
these commits and checks the authorization invariant.])
(p [Once a commit has been authenticated, all the commits in its
transitive closure are known to be already authenticated. ,(tt [guix pull])
keeps a local cache of the commits it has previously authenticated,
which allows it to traverse only new commits. For instance, if you are
at commit ,(it [F]) and later update to a descendant of ,(it [F]), authentication
starts at ,(it [F]).])
(p [Since ,(tt [.guix-authorizations]) is a regular file under version
control, granting or revoking commit authorization does not require
special support. In the example above, commit ,(it [B]) is an authorized
commit by Alice that adds Bobs key to ,(tt [.guix-authorizations]).
Revocation is similar: any authorized committer can remove entries from
,(tt [.guix-authorizations]). Key rotation can be handled similarly: a
committer can remove their former key and add their new key in a single
commit, signed by the former key.])
(p [The authorization invariant satisfies our needs for Guix. It
has one downside: it prevents pull-request-style workflows. Indeed,
merging the branch of a contributor not listed in ,(tt [.guix-authorizations])
would break the authorization invariant. Its a good tradeoff for Guix
because our workflow relies on patches carved into stone tablets
,(footnote [https://lwn.net/Articles/702177/]) (patch tracker,(footnote
[https://issues.guix.gnu.org/])), but its not suitable for every
project out there.]))
(chapter :title [Bootstrapping] :ident "bootstrapping"
(p [The attentive reader may have noticed that somethings missing
from the explanation above: what do we do about commit ,(it [A]) in the
example above? In other words, which commit do we pick as the first one
where we can start verifying the authorization invariant?])
(figure
:legend [The introductory commit in a graph.]
:ident "fig-commit-graph-intro"
(image :file "images/commit-graph-intro.pdf"))
(p [We solve this bootstrapping issue by defining ,(emph [channel introductions]).
Previously, one would identify a channel simply by its URL. Now, when
introducing a channel to users, one needs to provide an additional piece
of information: the first commit where the authorization invariant
holds, and the fingerprint of the OpenPGP key used to sign that commit
(its not strictly necessary but provides an additional check).])
(p [Consider the commit graph on ,(numref :text [Figure] :ident
"fig-commit-graph-intro"). On this figure, ,(it [B]) is the introduction commit. Its
ancestors, such as ,(it [A]) are considered authentic. To authenticate, ,(it [C]),
,(it [D]), ,(it [E]), and ,(it [F]), we check the authorization invariant.])
(p [As always when it comes to establishing trust, distributing
channel introductions is very sensitive. The introduction of the
official ,(tt [guix]) channel is built into Guix. Users obtain it when they
install Guix the first time; hopefully they verify the signature on the
Guix tarball or ISO image, as noted in the installation instructions,
which reduces chances of getting the “wrong” Guix, but it is still very
much trust-on-first-use (TOFU).])
(figure
:legend [Specification of a channel along with its ,(emph [introduction]).]
:ident "fig-channel-spec"
(prog :line #f
(source :language scheme [
(channel
(name 'my-channel)
(url "https://example.org/my-channel.git")
(introduction
(make-channel-introduction
"6f0d8cc0d88abb59c324b2990bfee2876016bb86"
(openpgp-fingerprint
"CABB A931 C0FF EEC6 900D 0CFB 090B 1199 3D9A EBB5"))))])))
(p [Guix supports third-party channels providing extra software
packages. To use a third-party channel, one needs to add it to the ,(tt
[~/.config/guix/channels.scm]) configuration file, which contains a
declarative Scheme code snippet listing the desired channels. Authors
of third-party channels can also benefit from the channel authentication
mechanism: they need to sign commits, to include a ,(tt
[.guix-authorizations]) file and the list of relevant OpenPGP keys, and
to advertise the channel's introduction. Users then have to provide the
channels introduction in their ,(tt [channels.scm]) file, as shown in
,(numref :text [Figure] :ident "fig-channel-spec").])
(p [The ,(tt [guix describe]) command now prints the introduction if
theres one. That way, one can share their channel configuration,
including introductions, without having to be an expert.])
(p [Channel introductions also solve another problem: forks.
Respecting the authorization invariant “forever” would effectively
prevent “unauthorized” forks—forks made by someone whos not in
,(tt [.guix-authorizations]). Someone publishing a fork simply needs to emit
a new introduction for their fork, pointing to a different starting
commit.])
(p [Last, channel introductions give a ,(emph [point of reference]): if an
attacker manipulates branch heads on Savannah to have them point to
unrelated commits (such as commits on an orphan branch that do not share
any history with the “official” branches), authentication will
necessarily fail as it stumbles upon the first unauthorized commit made
by the attacker. In the figure above, the red branch with commits ,(it [G])
and ,(it [H]) cannot be authenticated because it starts from ,(it [A]), which lacks
,(tt [.guix-authorizations]) and thus fails the authorization invariant.])
(p [Thats all for authentication! Im glad you read this far.
At this point you can take a break or continue with the next section on
how ,(tt [guix pull]) prevents downgrade attacks.]))
(chapter :title [Downgrade attacks] :ident "downgrade"
(p [An important threat for software deployment tools is
,(emph [downgrade]) or ,(emph [roll-back]) attacks. The attack consists in tricking
users into installing older, known-vulnerable software packages, which
in turn may offer new ways to break into their system. This is not
strictly related to the authentication issue weve been discussing,
except that its another important issue in this area that we took the
opportunity to address.])
(p [Guix saves provenance info for itself: ,(tt [guix describe]) prints that
information, essentially the Git commits of the channels used during
,(tt [git pull]):]
(prog :class "small" :line #f [
$ guix describe
Generation 149 Jun 17 2020 20:00:14 (current)
guix 8b1f7c0
repository URL: https://git.savannah.gnu.org/git/guix.git
branch: master
commit: 8b1f7c03d239ca703b56f2a6e5f228c79bc1857e
])
[Thus, ,(tt [guix pull]), once it has retrieved the latest commit of the
selected branch, can verify that it is doing a ,(emph [fast-forward update]) in
Git parlance—just like ,(tt [git pull]) does, but compared to the
previously-deployed Guix. A fast-forward update is when the new commit
is a descendant of the current commit. Going back to the figure above,
going from commit ,(it [A]) to commit ,(it [F]) is a fast-forward update, but going
from ,(it [F]) to ,(it [A]) or from ,(it [D]) to ,(it [E]) is not.])
(p [Not doing a fast-forward update would mean that the user is deploying an
older version of the Guix currently used, or deploying an unrelated
version from another branch. In both cases, the user is at risk of
ending up installing older, vulnerable software.])
(p [By default ,(tt [guix pull]) now errors out on non-fast-forward updates,
thereby protecting from roll-backs. Users who understand the
risks can override that by passing
,(tt [--allow-downgrades]).]))
(chapter :title [Mirrors and the risk of staleness]
:ident "mirrors"
(p [Authentication and roll-back prevention allow users to safely
refer to mirrors of the Git repository. If the official repository at
,(tt [git.savannah.gnu.org]) is down, one can still update by fetching
from a mirror, for instance with:]
(prog :line #f :class "small" [
guix pull --url=https://github.com/guix-mirror/guix
])
[If the repository at this URL is behind what the user already deployed,
or if its not a genuine mirror, ,(tt [guix pull]) will abort. In other
cases, it will proceed.])
(p [Unfortunately, there is no way to answer the general question
“,(it [is]) X ,(it [the latest commit of branch]) B ,(it [?])”.
Rollback detection prevents just that, rollbacks, but there is no
mechanism in place to tell whether a given mirror is stale. To mitigate
that, channel authors can specify, in the repository, the channels
,(emph [primary URL]). This piece of information lives in the
`.guix-channel` file, in the repository, so its authenticated. ,(tt
[guix pull]) uses it to print a warning when the user pulls from a
mirror:]
(prog :line #f :class "small" [
$ guix pull --url=https://github.com/guix-mirror/guix
Updating channel 'guix' from 'https://github.com/guix-mirror/guix'...
Authenticating channel 'guix', commits 9edb3f6 to 3e51f9e...
guix pull: warning: pulled channel 'guix' from a mirror of
https://git.savannah.gnu.org/git/guix.git, which might be stale
Building from this channel:
guix https://github.com/guix-mirror/guix 3e51f9e
]))
(p [So far we talked about mechanics in a rather abstract way. That might
satisfy the graph theorist or the Git geek in you, but if you are up for
a quick tour of the implementation, the next section is for you!]))
(chapter :title [Implementation])
(chapter :title [SHA-1] :ident "sha1"
(p [We cant really discuss Git commit signing without mentioning
SHA-1. The venerable crytographic hash function is approaching end of
life, as evidenced by recent breakthroughs ,(ref :bib
'(stevens2017:collision leurent2020:shambles)). Signing a Git commit
boils down to signing a SHA-1 hash, because all objects in the Git store
are identified by their SHA-1 hash.])
(p [Git now relies on a collision attack detection library ,(ref
:bib 'stevens2017:detection) to mitigate practical attacks.
Furthermore, the Git project is planning a hash function
transition,(footnote
[https://git-scm.com/docs/hash-function-transition/]) to address the
problem.])
(p [Some projects such as Bitcoin Core choose to not rely on SHA-1
at all. Instead, for the commits they sign, they include in the commit
log the SHA512 hash of the tree, which the verification scripts
check,(footnote
[https://github.com/bitcoin/bitcoin/tree/master/contrib/verify-commits]).
Computing a tree hash ,(emph [for each commit]) in Guix would probably be
prohibitively costly. For now, for lack of a better solution, we rely
on Gits collision attack detection and look forward to a hash function
transition.])
(p [As for SHA-1 in an OpenPGP context: our authentication code
rejects SHA-1 OpenPGP signatures, as recommended.]))
(chapter :title [Related Work]
:ident "related"
(p [A lot of work has gone into securing the software supply chain, often in
the context of binary distros, sometimes in a more general context; more
recent work also looks into Git authentication and related issues.
This section attempts to summarize how Guix relates to similar work that
we are aware of in these two areas. More detailed discussions can be
found in the issue tracker,(footnote [https://issues.guix.gnu.org/22883]).])
(p [The Update Framework,(footnote
[https://theupdateframework.io/]) (TUF) is a reference for secure update
systems, with a well-structured spec,(footnote
[https://github.com/theupdateframework/specification/blob/master/tuf-spec.md#the-update-framework-specification])
and a number of implementations. TUF is a great source of inspiration
to think about this problem space. Many of its goals are shared by
Guix. Not all the attacks it aims to protect against (Section 1.5.2 of
the spec) are addressed by whats presented in this post: ,(tt [indefinite
freeze attacks]), where updates never become available, are not addressed
,(emph [per se]) (though easily observable), and ,(emph [slow retrieval attacks]) arent
addressed either. The notion of ,(emph [role]) is also something currently
missing from the Guix authentication model, where any authorized
committer can touch any files, though the model and
,(tt [.guix-authorizations]) format leave room for such an extension.])
(p [However, both in its goals and system descriptions, TUF is
biased towards systems that distribute binaries as plain files with
associated meta-data. That creates a fundamental impedance mismatch.
As an example, attacks such as ,(emph [fast-forward attacks]) or ,(emph
[mix-and-match attacks]) dont apply in the context of Guix; likewise,
the ,(emph [repository]) depicted in Section 3 of the spec has little in
common with a Git repository.])
(p [Developers of OPAM, the OCaml package manager, adapted TUF for
use with their Git-based package repository,(footnote
[http://opam.ocaml.org/blog/Signing-the-opam-repository/]), later
updated to write Conex,(footnote [https://github.com/hannesm/conex]), a
separate tool to authenticate OPAM repositories. OPAM is interesting
because like Guix its a source distro and its package
repository,(footnote [https://github.com/ocaml/opam-repository]) is a
Git repository containing “build recipe”. To date, it appears that
,(tt [opam update]) itself does not authenticate repositories though; its up
to users or developer to run Conex.])
(p [Another interesting approach is to focus on the impact of
malicious modifications to Git repository meta-data ,(ref :bib
"torresarias2016:omitting"). An attacker with access to the repository
can modify, for instance, branch references, to cause a rollback attack
or a “teleport” attack, causing users to pull an older commit or an
unrelated commit. As written above, ,(tt [guix pull]) would detect such
attacks. However, ,(tt [guix pull]) would fail to detect cases where
metadata modification does not yield a rollback or teleport, yet gives
users a different view than the intended one—for instance, a user is
directed to an authentic but different branch rather than the intended
one. The “secure push” operation and the associated ,(emph [reference
state log]) (RSL) the authors propose would be an improvement.]))
(chapter :title [Conclusion and outlook]
:ident "conclusion"
(p [Guix now has a mechanism that allows it to authenticate
updates. If youve run ,(tt [guix pull]) recently, perhaps youve noticed
additional output and a progress bar as new commits are being
authenticated. Apart from that, the switch has been completely
transparent. The authentication mechanism is built around the commit
graph of Git; in fact, its a mechanism to ,(emph [authenticate Git checkouts])
and in that sense it is not tied to Guix and its application domain. It
is available not only for the main ,(tt [guix]) channel, but also for
third-party channels.])
(p [To bootstrap trust, we added the notion of _channel
introductions_. These are now visible in the user interface, in
particular in the output of ,(tt [guix describe]) and in the configuration
file of ,(tt [guix pull]) and ,(tt [guix time-machine]). While channel
configuration remains a few lines of code that users typically paste,
this extra bit of configuration might be intimidating. It certainly
gives an incentive to provide a command-line interface to manage the
users list of channels: ,(tt [guix channel add]), etc.])
(p [The solution here is built around the assumption that Guix is
fundamentally a source-based distribution, and is thus completely
orthogonal to the public key infrastructure (PKI) Guix uses for the
signature of substitutes. Yet, the substitute PKI could probably
benefit from the fact that we now have a secure update mechanism for the
Guix source code: since ,(tt [guix pull]) can securely retrieve a new
substitute signing key, perhaps it could somehow handle substitute
signing key revocation and delegation automatically? Related to that,
channels could perhaps advertise a substitute URL and its signing key,
possibly allowing users to register those when they first pull from the
channel. All this requires more thought, but it looks like there are
new opportunities here.]))
;; in-toto: https://www.usenix.org/conference/usenixsecurity19/presentation/torres-arias
(!latex "\n\\newpage\n\\balance\n")
(references))
;;; Local Variables:
;;; coding: utf-8
;;; ispell-local-dictionary: "american"
;;; compile-command: "make supply-chain.pdf"
;;; comment-start: ";;"
;;; End: