doc: Add CiSE article.

* doc/cise-2022: New directory.
* doc/programming-2022/security.sbib (lamb2021:reproducibile):
Coma-separate authors.
This commit is contained in:
Ludovic Courtès 2022-03-05 14:48:33 +01:00
parent 0dfe6a80ff
commit 455acc4f2e
No known key found for this signature in database
GPG Key ID: 090B11993D9AEBB5
6 changed files with 750 additions and 1 deletions

21
doc/cise-2022/GNUmakefile Normal file
View File

@ -0,0 +1,21 @@
SKRIBILO := skribilo
PDFLATEX := pdflatex
RUBBER := rubber
DOT := dot
DOT-OPTS := -Gratio=1.2 -Gwidth=15cm
.DEFAULT_GOAL := cpu-tuning.pdf
ILLUSTRATIONS := \
images/commit-graph.pdf \
images/commit-graph-intro.pdf
supply-chain.pdf: $(ILLUSTRATIONS)
%.pdf: %.tex
$(RUBBER) --pdf -I $$PWD "$<"
%.tex: %.skb
$(SKRIBILO) -t latex -o "$@" "$<"
%.pdf: %.dot
$(DOT) -Tpdf $(DOT-OPTS) < "$<" > "$@.tmp"
mv "$@.tmp" "$@"

View File

@ -0,0 +1,601 @@
(use-modules (skribilo engine)
(skribilo engine latex)
(skribilo ast)
(skribilo writer)
(skribilo output)
(skribilo utils strings)
(skribilo lib)
(skribilo evaluator)
(skribilo biblio author)
(skribilo source)
(skribilo source lisp)
(skribilo source parameters)
(ice-9 match)
(rnrs io ports))
(define (---) ; emdash
(resolve (lambda (n e env)
(if (engine-format? "html" e)
(! "&mdash;")
(! "---")))))
(define (--) ; endash
(resolve (lambda (n e env)
(if (engine-format? "html" e)
(! "&ndash;")
(! "--")))))
(define (dash-dash)
(resolve (lambda (n e env)
(if (engine-format? "latex" e)
(! "{-}{-}")
"--"))))
(define (url url)
(ref :text (tt url) :url url))
(define (=>)
(symbol "=>"))
;; XXX: Terrible hack to turn hyphens into hyphenation points in 'tt'.
(define latex-tt-encoding
`((#\- "-\\-")
(#\h "h\\-") ;“authorizations”, “authenticate”
,@(@@ (skribilo engine latex) latex-tt-encoding)))
(markup-writer 'tt (find-engine 'latex)
:before "{\\texttt{"
:action (lambda (n e)
(let ((ne (make-engine
(gensym "latex")
:delegate e
:filter (make-string-replace latex-tt-encoding)
:custom (engine-customs e)
:symbol-table (engine-symbol-table e))))
(output (markup-body n) ne)))
:after "}}")
;; For pdflatex.
(engine-custom-set! (find-engine 'latex) 'image-format '("pdf"))
;; Avoid "option clash" with acmart.
(engine-custom-set! (find-engine 'latex) 'hyperref #f)
(engine-custom-set! (find-engine 'latex) 'usepackage
(let ((u (engine-custom (find-engine 'latex)
'usepackage)))
;; See <https://en.wikibooks.org/wiki/LaTeX/Labels_and_Cross-referencing>
;; and <http://tug.org/pipermail/texhax/2010-September/015596.html>.
(string-append u "\n"
"\\usepackage{inconsolata}\n"
"\\usepackage{tikz}\n"
"\\usetikzlibrary{arrows,shapes,shadows}\n"
"\\definecolor{guixorange1}{RGB}{243,154,38} % guixorange P\n"
"\\definecolor{guixblue2}{RGB}{10,50,80} % guixblue S\n"
"\\definecolor{guixred2}{RGB}{230,68,57} % red S\n"
"\\definecolor{guixdarkgrey}{RGB}{46,47,55} % guixdarkgrey S\n"
;; Trick so that ‘…’ is properly
;; typeset inside teletype text.
"\\DeclareUnicodeCharacter{2026}{\\textrm{\\ldots}}\n"
;; Improve hyphenation.
"\\hyphenation{Open-PGP}\n")))
(let ((latex (find-engine 'latex)))
(engine-custom-set! latex 'documentclass
"\\documentclass{IEEEcsmag}")
(engine-custom-set! latex 'maketitle #f)
(markup-writer '&latex-author latex
:action (lambda (n e)
(let ((body (markup-body n)))
(for-each (lambda (a)
(display "\\author{{")
(output (markup-option a :name) e)
(display "}}\n\\affil{\n")
(output (markup-option a :affiliation) e)
(display "}\n\n"))
(if (pair? body) body (list body))))))
(markup-writer 'image latex
:options '(:file :url :width :height :zoom)
:action (lambda (n e)
(format #t "\n\\includegraphics[width=~a\\textwidth]{~a}\n"
(or (markup-option n :width) 0.5)
(markup-option n :file))))
(markup-writer 'prog latex
:class "small"
:options '(:line :mark)
:before "\n\n\\vspace{3mm}\n\\begin{footnotesize}\n"
:action (lambda (n e)
;; Delegate actual work to the "real" 'prog'.
(output (prog :line (markup-option n :line)
:mark (markup-option n :mark)
(node-body n))
e))
:after "\n\\end{footnotesize}\n")
(markup-writer 'figure latex
:options '(:legend :number :multicolumns)
:action (lambda (n e)
(let ((ident (markup-ident n))
(legend (markup-option n :legend))
(mc (markup-option n :multicolumns)))
(display "\\begin{figure}[ht]\n\\begin{scriptsize}\n")
(output (markup-body n) e)
(display "\n\\end{scriptsize}\n")
(format #t "\\caption{\\label{~a}"
(string-canonicalize ident))
(output legend e)
(display "}\\end{figure}\n"))))
(markup-writer '&bib-entry-author
:action (lambda (n e)
(let ((names (markup-body n)))
(evaluate-document
(if (string? names)
(abbreviate-first-names
names
abbreviate-author-first-names)
names)
e)))))
(define (word-count)
"Emit the word count."
(define (body-words body)
(match body
((? string? str)
(length (string-tokenize str)))
((? ast?) 0) ;dont double-count
((things ...)
(apply + (map body-words things)))))
(color :fg "red" (bold
[,(resolve (lambda (n env engine)
(ast-fold (lambda (n r)
(cond ((is-markup? n 'figure)
(+ r 250))
((and (is-markup? n 'chapter)
(equal? (markup-option n :title)
"References"))
(+ r 250))
((container? n)
r)
((markup? n)
(let ((body (markup-body n)))
(+ r (body-words body))))
(else r)))
(+ 250 250) ;abstract + biography
(ast-document n)))) words.])))
(define (abstract . body)
(!latex "\n\\begin{abstract}\n$1\n\\end{abstract}\n\n" body))
(bibliography "../els-2013/guix.sbib")
(bibliography "../reppar-2015/reppar.sbib")
(bibliography "../programming-2022/security.sbib")
(bibliography "hpc.sbib")
(document :title [Reproducibility and Performance: Why Choose?]
;;[Conciliating Performance and Reproducibility]
:author (list (author :name "Ludovic Courtès"
:affiliation "Inria"
:address "Bordeaux, France"))
(abstract [Research processes often rely on high-performance
computing (HPC), but HPC is often seen as antithetical to
“reproducibility”: one would have to choose between software that
achieves high performance, and software that can be deployed in a
reproducible fashion. However, by giving up on reproducibility we would
give up on verifiability, a foundation of the scientific process. How
can we conciliate performance and reproducibility? This article looks
at two performance-critical aspects in HPC: message passing (MPI) and
CPU micro-architecture tuning. Engineering work that has gone into
performance portability has already proved fruitful, but some areas
remain unaddressed when it comes to CPU tuning. We propose package
multi-versioning, a technique developed for GNU Guix, a tool for
reproducible software deployment, and show that it allows us to
implement CPU tuning without compromising on reproducibility and
provenance tracking.])
(!latex "\n\\maketitle\n")
(!latex "\n\\chapterinitial{Introduction.}\n")
;; (word-count)
(p [It should come as no surprise that the execution speed of programs is a
primary concern in high-performance computing (HPC). Many HPC
practitioners would tell you that, among their top concerns, is the
performance of high-speed networks used by the Message Passing Interface
(MPI) and use of the latest vectorization extensions of modern CPUs.])
(p [This article focuses on the latter: tuning code for specific CPU
micro-architectures, to reap the benefits of modern CPUs. This question
is particularly acute in the context of GNU Guix, a software deployment
tool with strong support for ,(emph [reproducible deployment]). We like
to present Guix as a key element of the reproducible research toolbox:
as more research output is produced by software, the ability to ,(emph
[verify and validate]) research results depends on the ability to ,(emph
[re-deploy and re-run]) the software. We present a recently-introduced
CPU-tuning option for Guix, the design choices we made, and how this
affects reproducibility.])
(p [But let us first consider this central question in the HPC and
scientific community: can “reproducibility” be achieved ,(emph
[without]) sacrificing performance? Our answer is a resounding “yes”,
but that deserves clarifications.])
(chapter :title [Reproducibility & High Performance]
:number #f
(p [The author remembers advice heard at the beginning of their
career in HPC—advice still given today—: that to get optimal MPI
performance, you would have to use the vendor-provided MPI library; that
to get your code to perform well on this new cluster, you would have to
recompile the complete software stack locally; that using generic,
pre-built binaries from a GNU/Linux distribution will not give you good
performance.])
(p [From a software engineering viewpoint, this looks like a sad
situation and an inefficient approach, dismissing the benefits of
automated software deployment as pioneered by Debian, Red Hat, and
others in the 90s or, more recently, as popularized with container
images. It also means doing away with reproducibility, where
“reproducibility” is to be understood in two different ways: first as
the ability to re-deploy the same software stack on another machine or
at a different point in time, and second as the ability to ,(emph [verify]) that
binaries being run match the source code—the latter is what reproducible
builds are concerned with ,(ref :bib 'lamb2021:reproducible).])
(p [But does it really have to be this way? Engineering efforts to
support ,(emph [performance portability]) suggest otherwise. A mature
MPI implementation like Open MPI, today, does achieve performance
portability: it takes advantage of high-speed networking hardware by
determining, at run-time, which drivers to use to obtain optimal
performance for the network at hand—no recompilation is needed ,(ref :bib
'courtes2019:openmpi).])
(p [Likewise, generic, pre-built binaries can and indeed often do
take advantage of modern CPUs by selecting at run-time the most
efficient implementation of performance-sensitive routines for the host
CPU ,(ref :bib 'courtes2018:prebuilt). There are cases, though, where
this is ,(emph [not]) the case; these are those we will focus on in the
remainder of this article.]))
(chapter :title [The Jungle of SIMD Extensions]
:number #f
(p [While major CPU architectures such as x86_64, AArch64, and
POWER9 were defined years ago, CPU vendors regularly extend them.
Extensions that matter most in HPC are vector extensions: single
instruction/multiple data (SIMD) instructions and registers. In this
area, a ,(emph [lot]) has happened on x86_64 CPUs since the baseline
instruction set architecture (ISA) was defined. As shown in ,(numref
:text [Figure] :ident "fig-simd-extensions"), Intel and AMD have been
tacking ever more powerful SIMD extensions to their CPUs over the years,
from SSE3 to AVX-512, leading to a wealth of CPU “micro-architectures”.
This gives a high-level view, but just looking at generations of Intel
processors by their code name—from “Nehalem” to “Skylake” ,(it [via])
“Ivybridge”—shows an already more complicated story.])
(figure :legend [Timeline of x86_64 SIMD extensions]
:ident "fig-simd-extensions"
:multicolumns #t
(!latex (call-with-input-file "images/cpu-simd-extensions.tex"
get-string-all)))
(p [Linear algebra routines that scientific software relies on
greatly benefit from SIMD extensions. For example, on a modest Intel
CORE i7 processor (of the Skylake generation), the AVX2-optimized
version of the dense matrix multiplication routines of Eigen ,(ref :bib
'guennebaud2022:eigen), built with GCC 10.3, peaks at about 40 Gflops/s,
compared to 11 Gflops/s for its baseline x86_64 version—four times
faster!]))
(chapter :title [Portable Performance Through Function Multi-Versioning]
:number #f
(p [How to create binaries that are portable, yet are able to get
the most out of the CPU on which they are executed? This has been an
important question for distributors of binaries. Distributions such as
Debian and CentOS provide the convenience of fast automated deployment,
thanks to pre-built binaries; asking users to either recompile part of
their software stack or give up on performance is not a reasonable
alternative.])
(p [To address this and achieve performance portability,
developers have largely adopted ,(emph [function multi-versioning])
(FMV): the implementation provides multiple versions of “hot” routines,
one for each relevant CPU micro-architecture, and picks the best one for
the host CPU at run time. Many pieces of performance-critical software
already use this technique: the C standard library (libc) contains
multiple versions of its string handling and math routines, the GMP
library for multi-precision arithmetic uses FMV, and so do software
packages ranging from cryptography libraries (Libgcrypt, Nettle) to
linear algebra (OpenBLAS, FFTW).])
(p [To make it easier for developers to adopt FMV, the GNU
compilation tool chain (GCC, the Binary Utilities, and the C Library),
which is widely used in HPC, provides helpers at different levels.
Developers can annotate relevant functions with the ,(tt [target_clone])
attribute to instruct the compiler to generate optimized versions of the
function for each selected architecture. GCC not only generates these
versions, but also generates code to choose the right function version
for the host CPU at load time, with support from the dynamic linker,
,(tt [ld.so]). That relieves developers from the need to implement
their own ad-hoc machinery. From that perspective, it would seem that
performance portability, ,(it [via]) FMV, is a solved problem.])
#;(stuff on auto-fmv commented out!
(p [To make the case for FMV, we wanted to see what it would take us to
actually add FMV support to code that would benefit from it. In the
spirit of the Clear Linux automatic FMV patch
generator (https://github.com/clearlinux/make-fmv-patch), we wrote an
automatic FMV tool for
Guix (https://gitlab.inria.fr/guix-hpc/function-multi-versioning): you
would give it a package name, and it would:])
(itemize
(item [Build the package with the ,(tt [-fopt-info-vec]) compiler flag to gather
information about vectorization opportunities and their source code
location.])
(item [Generate a patch that, for each C function with vectorization
opportunities, adds the ,(tt [target_clone])
attribute to generate a couple of vectorized versions—generic,
AVX2, and
AVX-512.])
(item [Build the package with this FMV patch.]))
(p [The tool can successfully FMV-patch a variety of packages
written in C, such as the GNU Scientific Library (FIXME
https://www.gnu.org/software/gsl), which contains plain sequential
implementations of a variety of math routines. It was an exciting
engineering experiment… but we found it to be all too often
inapplicable, for two reasons: performance-critical software already
does FMV, or it is not written in C.]))
(p [There is at least one common pattern though where FMV is not
applicable, or at least is not applied: C++ header-only libraries.
These are libraries that provide generic template code in header files;
that code is specialized ,(emph [at build time]) in software that uses
them. There is no shortage of C++ header-only math libraries providing
efficient, optimized SIMD versions of their routines: Eigen, MIPP, xsimd
and xtensor, SIMD Everywhere (SIMDe), Highway, and many more. All
these, except Highway, have in common that they do ,(emph [not]) support
FMV. Since they “just” provide headers, it is up to ,(emph [each])
package using them to figure out what to do in terms of performance
portability.])
(p [In practice though, software using these C++ header-only
libraries rarely makes provisions for performance portability. Thus,
when compiling those packages for the baseline ISA, one misses out on
all the vectorized implementations that libraries like Eigen provide.
This is a known issue in search of a solution ,(ref :bib
'larsen2021:eigen-fmv). It can have a very concrete impact on
performance since many scientific packages—the ARPACK-NG library for
solving eigenvalue problems, the Ceres solver for optimization problems,
the FEniCSx platform for solving differential equations, to name a
few—depend on Eigen.])
#;(p [Fundamentally, run-time dispatch is at odds with the all-compile-time
approach that header-only C++ template libraries are about.
Furthermore, Eigen, for example, supports fine-grain vectorization; it
may be used to operate on small matrices, as is common in computer
graphics, and in that case inlining matrix operations is key to good
performance—run-time dispatch would have to be done at a higher
level.]))
(chapter :title [Reproducible Deployment]
:number #f
(p [Distributions such as Debian and Fedora that provide pre-built
binaries miss out on SIMD optimizations of C++ header-only libraries
like Eigen because they provide binaries targeting the baseline CPU
architecture so that those binaries run on any CPU. The Spack ,(ref
:bib 'gamblin2015:spack) and EasyBuild ,(ref :bib 'geimer2014:easybuild)
package managers address that by ,(emph [rebuilding]) software on the
target computer, which allows them to instruct the compiler to optimize
for the host CPU.])
(p [Unfortunately, EasyBuild and Spack both have limited support
for reproducible deployment—they do not, in general, guarantee that you
can redeploy the same software environment on different machines, or at
different points in time. This is because they build upon software
provided by the host system—the compiler tool chain, “system” libraries,
etc.—and that foundation differs from one system to another—e.g., CentOS
might provide some version of GCC, and Ubuntu might provide another.])
(p [To avoid that, Guix builds software in ,(emph [isolated
environments]), as pioneered by Nix ,(ref :bib '(dolstra2004:nix
courtes2013:functional)), and its package collection is ,(emph
[self-contained])—it does not rely on external software packages. This
is what makes Guix builds reproducible bit-for-bit—or in other words,
,(emph [verifiable]) ,(ref :bib 'lamb2021:reproducible). Given binaries
and provenance data, anyone can independently verify the
binary/source-code correspondence.])
(p [Guix provides a command-line interface similar to that of
other package managers: ,(tt [guix install python]), for instance,
installs the Python interpreter. Package management is per-user rather
than system-wide and does not require system administrator privileges,
which makes it suitable for multi-user HPC clusters ,(ref :bib
'courtes2015:reproducible). To offer the level of flexibility that HPC
users expect, Guix lets users customize packages ,(it [via]) ,(emph
[package transformation options]) on the command line—for instance to
swap two packages in the dependency graph—or through programming
interfaces ,(ref :bib 'courtes2015:reproducible).])
(p [Quite uniquely, Guix supports ,(emph [“time traveling”]): with
,(tt [guix time-machine]), users can run a specific revision of Guix and
use it to deploy packages as they were defined in that revision. The
typical use case is redeploying software that was used to produce
computational results for a scientific publication ,(ref :bib
'(hinsen2020:staged-computation courtes2020:storage
perkel2020:challenge)). The command below deploys Python, NumPy, and
their dependencies as they were defined in a Guix revision from October
2021:])
(prog :class "small" :line #f [
guix time-machine --commit=b0735c79b0d1d341 -- \\
shell python python-numpy
])
(p [Whether you run it today or two years from now, it will deploy
the ,(emph [exact same binaries]), bit-for-bit, down to the C
library.]))
(chapter :title [Package Multi-Versioning]
:number #f
(p [With our packaging hammer, one could envision a solution to
these CPU tuning problems: if we cannot do function multi-versioning,
what about implementing ,(emph [package]) multi-versioning? Guix makes
it easy to define package variants, so we can define package variants
optimized for a specific CPU—compiled with ,(tt [-march=skylake]), for
instance. What we need is to define those variants “on the fly”.])
(p [The recently-introduced ,(tt [--tune]) package transformation
option works along those lines. Users can pass ,(tt [--tune]) to any of
the command-line tools (,(tt [guix install]), ,(tt [guix shell]), etc.)
and that causes “tunable” packages to be optimized for the host CPU.
For example, here is how you would run Eigens matrix multiplication
benchmark from the ,(tt [eigen-benchmarks]) package with
micro-architecture tuning:])
(prog :class "small" :line #f [
$ guix shell --tune eigen-benchmarks -- \\
benchBlasGemm 240 240 240
guix shell: tuning for CPU skylake
240 x 240 x 240
cblas: 0.208547 (15.908 GFlops/s)
eigen : 0.0720303 (46.06 GFlops/s)
l1: 32768
l2: 262144
])
(p [,(tt [--tune]) determines the name of the host CPU as
recognized by GCCs (and Clangs) ,(tt [-march]) option. Users can
override auto-detection by passing a CPU name—e.g., ,(tt
[--tune=skylake-avx512]). As mentioned earlier, we made the conscious
choice of letting ,(tt [--tune]) affect solely software that packagers
explicitly marked as “tunable”. This ensures Guix does not end up
rebuilding packages that could not possibly benefit from
micro-architecture-specific optimizations, which would be a waste of
resources.])
#;(p [(For the same
reason, we rejected the idea of defining separate system types for the
various x86_64 CPU micro-architectures the way Nix 2.4 did (FIXME
https://discourse.nixos.org/t/nix-2-4-released/15822#other-features-2).)])
#;(p [In the spirit of avoiding needless package rebuilds, ,(tt [--tune])
leverages the “graft” mechanism (XREF
https://guix.gnu.org/manual/en/html_node/Security-Updates.html): package
variants are ,(emph [grafted]) to the dependency graph, such that dependents of
a tuned package do not need to be rebuilt. To illustrate that, consider
the figure below:])
;;![Dependency graph of OpenCV, where the tuned variant of VTK is grafted.](/static/images/blog/cpu-tuning-graft.png)
#;(p [OpenCV depends on VTK, which depends on Eigen, as shown by the
dotted arrows. VTK is marked as tunable so it can benefit from SIMD
optimizations in Eigen. When ,(tt [--tune]) is passed, the optimized variant
of VTK built with ,(tt [-march=skylake]) is generated and grafted onto the
dependency graph, such that OpenCV itself does not need to be recompiled
and instead is relinked against the optimized VTK variant.])
(p [This implementation of package multi-versioning does not
sacrifice reproducibility. When ,(tt [--tune]) is used, from Guixs
viewpoint, it is just an alternate, but well-defined dependency graph
that gets built. Guix records package transformation options that were
used so it can “replay” them. For example, one can export a ,(emph
[manifest]) representing packages that have been deployed:])
(prog :class "small" :line #f [
$ guix shell eigen-benchmarks --tune
guix shell: tuning for CPU skylake
\[env\]$ guix package --export-manifest \\
-p $GUIX_ENVIRONMENT
(use-modules (guix transformations))
(define transform1
(options->transformation
'((tune . "skylake"))))
(packages->manifest
(list (transform1
(specification->package
"eigen-benchmarks"))))
])
(p [The manifest above is a code snippet that can be passed to
,(tt [guix shell]) or ,(tt [guix package]) to redeploy the package with
the same tuning parameters. Like other transformation options, ,(tt
[--tune]) is accepted by all the commands; for example, here is how you
would build a Docker image tuned for a particular CPU:])
(prog :class "small" :line #f [
guix pack -f docker -S /bin=bin \
eigen-benchmarks --tune=skylake
])
#;(p [This comes in handy if you want to prepare an image to run on
another cluster, where you know you can rely on a given CPU extension.])
#;(p [The Guix build farm is set up to build a few optimized package
variants. That way, users of ,(tt [--tune]) are likely to get pre-built
binaries even for the optimized variants, making deployment just as fast
as with non-tuned packages. To achieve this, ,(tt [--tune]) skips
running test suites when building packages: we cannot be sure that build
machines implement the CPU micro-architecture at hand.]))
(chapter :title [Conclusion and Outlook]
:number #f
(p [We implemented what we call “package multi-versioning” for
C/C++ software that lacks function multi-versioning and run-time
dispatch, a notable example of which is optimized C++ header-only
libraries. It is another way to ensure that users do not have to trade
reproducibility for performance.])
;; refs:
;; (FIXME https://docs.julialang.org/en/v1/devdocs/sysimg/)
;; (FIXME https://docs.rs/multiversion/0.6.1/multiversion/)
(p [The scientific programming landscape has been evolving over
the last few years. It is encouraging to see that Julia offers function
multi-versioning for its “system image”, and that, similarly, Rust
supports it with annotations similar to GCCs ,(tt [target_clones]).
Hopefully these new development environments will support performance
portability well enough that users and packagers will not need to worry
about it.])
(p [But first and foremost, it is up to us, research software
engineers and scientists, to dispel the myth that performance is a valid
excuse for non-reproducible computational workflows.]))
(chapter :title "References"
:number #f
(flush :side 'left
(the-bibliography
:sort bib-sort/first-author-last-name)))
(!latex
"\n\\begin{IEEEbiography}{Ludovic Courtès}\n$1\n\\end{IEEEbiography}\n"
[is a research software engineer at Inria, France. He has been
contributing to the development of GNU Guix since its inception in 2012
and works on its use in support of reproducible research workflows. He
holds a PhD in computer science from LAAS-CNRS. You can reach him at
,(it [ludovic.courtes@inria.fr]).]))
;; Local Variables:
;; ispell-local-dictionary: "american"
;; compile-command: "guix shell -m manifest.scm -- make -j5"
;; eval: (setq indent-tabs-mode nil)
;; End:

96
doc/cise-2022/hpc.sbib Normal file
View File

@ -0,0 +1,96 @@
(misc courtes2019:openmpi
(title "Optimized and Portable Open MPI Packaging")
(author "Ludovic Courtès")
(year "2019")
(month "December")
(url "https://hpc.guix.info/blog/2019/12/optimized-and-portable-open-mpi-packaging/"))
(misc courtes2018:prebuilt
(title "Pre-Built Binaries vs. Performance")
(author "Ludovic Courtès")
(year "2018")
(month "January")
(url "https://hpc.guix.info/blog/2018/01/pre-built-binaries-vs-performance/"))
(misc guennebaud2022:eigen
(title "Eigen C++ linear algebra library")
(author "Gaël Guennebaud, Benoît Jacob, et al.")
(year "2022")
(month "March")
(url "https://eigen.tuxfamily.org"))
(inproceedings cassagne2018:mipp
(author "Adrien Cassagne, Olivier Aumage, Denis Barthou, Camille Leroux, Christophe Jégo")
(title "MIPP: A Portable C++ SIMD Wrapper and Its Use for Error Correction Coding in 5G Standard")
(year "2018")
(isbn "9781450356466")
(publisher "Association for Computing Machinery")
(address "New York, NY, USA")
(url "https://doi.org/10.1145/3178433.3178435")
(doi "10.1145/3178433.3178435")
(booktitle "Proceedings of the 2018 4th Workshop on Programming Models for SIMD/Vector Processing")
(articleno "2")
(numpages "8")
(keywords "wrapper, channel code, C++, SSE, SIMD, AVX-512, NEON, AVX")
(location "Vienna, Austria")
(series "WPMVP'18"))
(misc larsen2021:eigen-fmv
(title "Linking modules compiled for different SIMD instruction sets")
(author "Rasmus Munk Larsen, Benoît Jacob, Antonio Sánchez")
(url "https://gitlab.com/libeigen/eigen/-/issues/2344")
(year "2021")
(month "October"))
(inproceedings gamblin2015:spack
(author "Todd Gamblin, Matthew LeGendre, Michael R. Collette, Gregory L. Lee, Adam Moody, Bronis R. de Supinski, Scott Futral")
(title "The Spack Package Manager: Bringing Order to HPC Software Chaos")
(year "2015")
(isbn "9781450337236")
(publisher "Association for Computing Machinery")
(address "New York, NY, USA")
(url "https://doi.org/10.1145/2807591.2807623")
(doi "10.1145/2807591.2807623")
(booktitle "Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis")
(articleno "40")
(numpages "12")
(location "Austin, Texas")
(series "SC '15"))
(inproceedings courtes2015:reproducible
(url "https://hal.inria.fr/hal-01161771/en")
(series "Lecture Notes in Computer Science")
(month "August")
(pages "579--591")
(year "2015")
(booktitle "Euro-Par 2015: Parallel Processing Workshops")
(author "Ludovic Courtès, Ricardo Wurmus")
(title "Reproducible and User-Controlled Software Environments in HPC with Guix"))
(article courtes2020:storage
(note "https://doi.org/10.5281/zenodo.3886739")
(doi "10.5281/zenodo.3886739")
(month "June")
(year "2020")
(number "1")
(volume "6")
(journal "ReScience C")
(author "Ludovic Courtès")
(title "[Re] Storage Tradeoffs in a Collaborative Backup Service for Mobile Devices"))
(article perkel2020:challenge
(month "August")
(year "2020")
(note "https://www.nature.com/articles/d41586-020-02462-7")
(journal "Nature")
(author "Jeffrey M. Perkel")
(title "Challenge to Scientists: Does Your Ten-Year-Old Code Still Run?"))
#|
(defun skr-from-bibtex ()
"Vaguely convert the BibTeX snippets after POINT to SBibTeX."
(interactive)
(while (re-search-forward "\\([a-z_-]+\\) *= *[{\"]\\([^}\"]+\\)[}\"] *, *$" nil nil)
(replace-match "(\\1 \"\\2\")")))
|#

View File

@ -0,0 +1,15 @@
\begin{tikzpicture}[
box/.style = { fill=guixblue2, text=white, inner sep=3mm, rounded corners, font=\bf\sf }
]
\matrix[row sep=6mm, column sep=6mm] {
\node(sse2) [box, draw=guixorange1, thick] {SSE2 (ca. 2003)}; & & \node(avx512) [box] {AVX-512 (2013)}; \\
\node(sse3) [box] {SSE3}; & \node {\large\textbf{x86\_64}}; & \node(avx2) [box] {AVX2}; \\
\node(ssse3) [box] {SSSE3}; & & \node(avx) [box] {AVX}; \\
};
\path[very thick, draw=guixorange1] (sse2) edge [->] (sse3);
\path[very thick, draw=guixorange1] (sse3) edge [->] (ssse3);
\path[very thick, draw=guixorange1] (ssse3) edge [->] (avx);
\path[very thick, draw=guixorange1] (avx) edge [->] (avx2);
\path[very thick, draw=guixorange1] (avx2) edge [->] (avx512);
\end{tikzpicture}

View File

@ -0,0 +1,16 @@
(specifications->manifest
'("rubber"
"texlive-base"
"texlive-latex-wrapfig"
"texlive-microtype"
"texlive-latex-listings" "texlive-hyperref"
;; PGF/TikZ
"texlive-latex-pgf"
;; Additional fonts.
"texlive-cm-super" "texlive-amsfonts"
"texlive-inconsolata" "texlive-latex-xkeyval" "texlive-latex-upquote"
"texlive-times" "texlive-helvetic" "texlive-courier"))

View File

@ -1,5 +1,5 @@
(article lamb2021:reproducible
(author "Chris Lamb and Stefano Zacchiroli")
(author "Chris Lamb, Stefano Zacchiroli")
(title "Reproducible Builds: Increasing the Integrity of Software Supply Chains")
(publisher "IEEE Computer Society")
(year "2021")