diff --git a/doc/cise-2022/GNUmakefile b/doc/cise-2022/GNUmakefile new file mode 100644 index 0000000..f305659 --- /dev/null +++ b/doc/cise-2022/GNUmakefile @@ -0,0 +1,21 @@ +SKRIBILO := skribilo +PDFLATEX := pdflatex +RUBBER := rubber +DOT := dot +DOT-OPTS := -Gratio=1.2 -Gwidth=15cm + +.DEFAULT_GOAL := cpu-tuning.pdf +ILLUSTRATIONS := \ + images/commit-graph.pdf \ + images/commit-graph-intro.pdf + +supply-chain.pdf: $(ILLUSTRATIONS) +%.pdf: %.tex + $(RUBBER) --pdf -I $$PWD "$<" + +%.tex: %.skb + $(SKRIBILO) -t latex -o "$@" "$<" + +%.pdf: %.dot + $(DOT) -Tpdf $(DOT-OPTS) < "$<" > "$@.tmp" + mv "$@.tmp" "$@" diff --git a/doc/cise-2022/cpu-tuning.skb b/doc/cise-2022/cpu-tuning.skb new file mode 100644 index 0000000..ff7220c --- /dev/null +++ b/doc/cise-2022/cpu-tuning.skb @@ -0,0 +1,601 @@ +(use-modules (skribilo engine) + (skribilo engine latex) + (skribilo ast) + (skribilo writer) + (skribilo output) + (skribilo utils strings) + (skribilo lib) + (skribilo evaluator) + (skribilo biblio author) + (skribilo source) + (skribilo source lisp) + (skribilo source parameters) + (ice-9 match) + (rnrs io ports)) + +(define (---) ; emdash + (resolve (lambda (n e env) + (if (engine-format? "html" e) + (! "—") + (! "---"))))) + +(define (--) ; endash + (resolve (lambda (n e env) + (if (engine-format? "html" e) + (! "–") + (! "--"))))) + +(define (dash-dash) + (resolve (lambda (n e env) + (if (engine-format? "latex" e) + (! "{-}{-}") + "--")))) + +(define (url url) + (ref :text (tt url) :url url)) + +(define (=>) + (symbol "=>")) + +;; XXX: Terrible hack to turn hyphens into hyphenation points in 'tt'. +(define latex-tt-encoding + `((#\- "-\\-") + (#\h "h\\-") ;“authorizations”, “authenticate” + ,@(@@ (skribilo engine latex) latex-tt-encoding))) + +(markup-writer 'tt (find-engine 'latex) + :before "{\\texttt{" + :action (lambda (n e) + (let ((ne (make-engine + (gensym "latex") + :delegate e + :filter (make-string-replace latex-tt-encoding) + :custom (engine-customs e) + :symbol-table (engine-symbol-table e)))) + (output (markup-body n) ne))) + :after "}}") + +;; For pdflatex. +(engine-custom-set! (find-engine 'latex) 'image-format '("pdf")) + +;; Avoid "option clash" with acmart. +(engine-custom-set! (find-engine 'latex) 'hyperref #f) + +(engine-custom-set! (find-engine 'latex) 'usepackage + (let ((u (engine-custom (find-engine 'latex) + 'usepackage))) + ;; See + ;; and . + (string-append u "\n" + "\\usepackage{inconsolata}\n" + "\\usepackage{tikz}\n" + "\\usetikzlibrary{arrows,shapes,shadows}\n" + "\\definecolor{guixorange1}{RGB}{243,154,38} % guixorange P\n" + "\\definecolor{guixblue2}{RGB}{10,50,80} % guixblue S\n" + "\\definecolor{guixred2}{RGB}{230,68,57} % red S\n" + "\\definecolor{guixdarkgrey}{RGB}{46,47,55} % guixdarkgrey S\n" + + ;; Trick so that ‘…’ is properly + ;; typeset inside teletype text. + "\\DeclareUnicodeCharacter{2026}{\\textrm{\\ldots}}\n" + + ;; Improve hyphenation. + "\\hyphenation{Open-PGP}\n"))) + +(let ((latex (find-engine 'latex))) + (engine-custom-set! latex 'documentclass + "\\documentclass{IEEEcsmag}") + (engine-custom-set! latex 'maketitle #f) + + (markup-writer '&latex-author latex + :action (lambda (n e) + (let ((body (markup-body n))) + (for-each (lambda (a) + (display "\\author{{") + (output (markup-option a :name) e) + (display "}}\n\\affil{\n") + (output (markup-option a :affiliation) e) + (display "}\n\n")) + (if (pair? body) body (list body)))))) + + (markup-writer 'image latex + :options '(:file :url :width :height :zoom) + :action (lambda (n e) + (format #t "\n\\includegraphics[width=~a\\textwidth]{~a}\n" + (or (markup-option n :width) 0.5) + (markup-option n :file)))) + + (markup-writer 'prog latex + :class "small" + :options '(:line :mark) + :before "\n\n\\vspace{3mm}\n\\begin{footnotesize}\n" + :action (lambda (n e) + ;; Delegate actual work to the "real" 'prog'. + (output (prog :line (markup-option n :line) + :mark (markup-option n :mark) + (node-body n)) + e)) + :after "\n\\end{footnotesize}\n") + + (markup-writer 'figure latex + :options '(:legend :number :multicolumns) + :action (lambda (n e) + (let ((ident (markup-ident n)) + (legend (markup-option n :legend)) + (mc (markup-option n :multicolumns))) + (display "\\begin{figure}[ht]\n\\begin{scriptsize}\n") + (output (markup-body n) e) + (display "\n\\end{scriptsize}\n") + (format #t "\\caption{\\label{~a}" + (string-canonicalize ident)) + (output legend e) + (display "}\\end{figure}\n")))) + + (markup-writer '&bib-entry-author + :action (lambda (n e) + (let ((names (markup-body n))) + (evaluate-document + (if (string? names) + (abbreviate-first-names + names + abbreviate-author-first-names) + names) + e))))) + +(define (word-count) + "Emit the word count." + (define (body-words body) + (match body + ((? string? str) + (length (string-tokenize str))) + ((? ast?) 0) ;don’t double-count + ((things ...) + (apply + (map body-words things))))) + + (color :fg "red" (bold + [,(resolve (lambda (n env engine) + (ast-fold (lambda (n r) + (cond ((is-markup? n 'figure) + (+ r 250)) + ((and (is-markup? n 'chapter) + (equal? (markup-option n :title) + "References")) + (+ r 250)) + ((container? n) + r) + ((markup? n) + (let ((body (markup-body n))) + (+ r (body-words body)))) + (else r))) + (+ 250 250) ;abstract + biography + (ast-document n)))) words.]))) + + +(define (abstract . body) + (!latex "\n\\begin{abstract}\n$1\n\\end{abstract}\n\n" body)) + +(bibliography "../els-2013/guix.sbib") +(bibliography "../reppar-2015/reppar.sbib") +(bibliography "../programming-2022/security.sbib") +(bibliography "hpc.sbib") + + +(document :title [Reproducibility and Performance: Why Choose?] + ;;[Conciliating Performance and Reproducibility] + :author (list (author :name "Ludovic Courtès" + :affiliation "Inria" + :address "Bordeaux, France")) + + (abstract [Research processes often rely on high-performance +computing (HPC), but HPC is often seen as antithetical to +“reproducibility”: one would have to choose between software that +achieves high performance, and software that can be deployed in a +reproducible fashion. However, by giving up on reproducibility we would +give up on verifiability, a foundation of the scientific process. How +can we conciliate performance and reproducibility? This article looks +at two performance-critical aspects in HPC: message passing (MPI) and +CPU micro-architecture tuning. Engineering work that has gone into +performance portability has already proved fruitful, but some areas +remain unaddressed when it comes to CPU tuning. We propose package +multi-versioning, a technique developed for GNU Guix, a tool for +reproducible software deployment, and show that it allows us to +implement CPU tuning without compromising on reproducibility and +provenance tracking.]) + + (!latex "\n\\maketitle\n") + (!latex "\n\\chapterinitial{Introduction.}\n") + + ;; (word-count) + + (p [It should come as no surprise that the execution speed of programs is a +primary concern in high-performance computing (HPC). Many HPC +practitioners would tell you that, among their top concerns, is the +performance of high-speed networks used by the Message Passing Interface +(MPI) and use of the latest vectorization extensions of modern CPUs.]) + + (p [This article focuses on the latter: tuning code for specific CPU +micro-architectures, to reap the benefits of modern CPUs. This question +is particularly acute in the context of GNU Guix, a software deployment +tool with strong support for ,(emph [reproducible deployment]). We like +to present Guix as a key element of the reproducible research toolbox: +as more research output is produced by software, the ability to ,(emph +[verify and validate]) research results depends on the ability to ,(emph +[re-deploy and re-run]) the software. We present a recently-introduced +CPU-tuning option for Guix, the design choices we made, and how this +affects reproducibility.]) + + (p [But let us first consider this central question in the HPC and +scientific community: can “reproducibility” be achieved ,(emph +[without]) sacrificing performance? Our answer is a resounding “yes”, +but that deserves clarifications.]) + + (chapter :title [Reproducibility & High Performance] + :number #f + + (p [The author remembers advice heard at the beginning of their +career in HPC—advice still given today—: that to get optimal MPI +performance, you would have to use the vendor-provided MPI library; that +to get your code to perform well on this new cluster, you would have to +recompile the complete software stack locally; that using generic, +pre-built binaries from a GNU/Linux distribution will not give you good +performance.]) + + (p [From a software engineering viewpoint, this looks like a sad +situation and an inefficient approach, dismissing the benefits of +automated software deployment as pioneered by Debian, Red Hat, and +others in the 90’s or, more recently, as popularized with container +images. It also means doing away with reproducibility, where +“reproducibility” is to be understood in two different ways: first as +the ability to re-deploy the same software stack on another machine or +at a different point in time, and second as the ability to ,(emph [verify]) that +binaries being run match the source code—the latter is what reproducible +builds are concerned with ,(ref :bib 'lamb2021:reproducible).]) + + (p [But does it really have to be this way? Engineering efforts to +support ,(emph [performance portability]) suggest otherwise. A mature +MPI implementation like Open MPI, today, does achieve performance +portability: it takes advantage of high-speed networking hardware by +determining, at run-time, which drivers to use to obtain optimal +performance for the network at hand—no recompilation is needed ,(ref :bib +'courtes2019:openmpi).]) + + (p [Likewise, generic, pre-built binaries can and indeed often do +take advantage of modern CPUs by selecting at run-time the most +efficient implementation of performance-sensitive routines for the host +CPU ,(ref :bib 'courtes2018:prebuilt). There are cases, though, where +this is ,(emph [not]) the case; these are those we will focus on in the +remainder of this article.])) + + (chapter :title [The Jungle of SIMD Extensions] + :number #f + + (p [While major CPU architectures such as x86_64, AArch64, and +POWER9 were defined years ago, CPU vendors regularly extend them. +Extensions that matter most in HPC are vector extensions: single +instruction/multiple data (SIMD) instructions and registers. In this +area, a ,(emph [lot]) has happened on x86_64 CPUs since the baseline +instruction set architecture (ISA) was defined. As shown in ,(numref +:text [Figure] :ident "fig-simd-extensions"), Intel and AMD have been +tacking ever more powerful SIMD extensions to their CPUs over the years, +from SSE3 to AVX-512, leading to a wealth of CPU “micro-architectures”. +This gives a high-level view, but just looking at generations of Intel +processors by their code name—from “Nehalem” to “Skylake” ,(it [via]) +“Ivybridge”—shows an already more complicated story.]) + + (figure :legend [Timeline of x86_64 SIMD extensions] + :ident "fig-simd-extensions" + :multicolumns #t + (!latex (call-with-input-file "images/cpu-simd-extensions.tex" + get-string-all))) + + (p [Linear algebra routines that scientific software relies on +greatly benefit from SIMD extensions. For example, on a modest Intel +CORE i7 processor (of the Skylake generation), the AVX2-optimized +version of the dense matrix multiplication routines of Eigen ,(ref :bib +'guennebaud2022:eigen), built with GCC 10.3, peaks at about 40 Gflops/s, +compared to 11 Gflops/s for its baseline x86_64 version—four times +faster!])) + + (chapter :title [Portable Performance Through Function Multi-Versioning] + :number #f + + (p [How to create binaries that are portable, yet are able to get +the most out of the CPU on which they are executed? This has been an +important question for distributors of binaries. Distributions such as +Debian and CentOS provide the convenience of fast automated deployment, +thanks to pre-built binaries; asking users to either recompile part of +their software stack or give up on performance is not a reasonable +alternative.]) + + (p [To address this and achieve performance portability, +developers have largely adopted ,(emph [function multi-versioning]) +(FMV): the implementation provides multiple versions of “hot” routines, +one for each relevant CPU micro-architecture, and picks the best one for +the host CPU at run time. Many pieces of performance-critical software +already use this technique: the C standard library (libc) contains +multiple versions of its string handling and math routines, the GMP +library for multi-precision arithmetic uses FMV, and so do software +packages ranging from cryptography libraries (Libgcrypt, Nettle) to +linear algebra (OpenBLAS, FFTW).]) + + (p [To make it easier for developers to adopt FMV, the GNU +compilation tool chain (GCC, the Binary Utilities, and the C Library), +which is widely used in HPC, provides helpers at different levels. +Developers can annotate relevant functions with the ,(tt [target_clone]) +attribute to instruct the compiler to generate optimized versions of the +function for each selected architecture. GCC not only generates these +versions, but also generates code to choose the right function version +for the host CPU at load time, with support from the dynamic linker, +,(tt [ld.so]). That relieves developers from the need to implement +their own ad-hoc machinery. From that perspective, it would seem that +performance portability, ,(it [via]) FMV, is a solved problem.]) + + #;(stuff on auto-fmv commented out! + + (p [To make the case for FMV, we wanted to see what it would take us to +actually add FMV support to code that would benefit from it. In the +spirit of the Clear Linux automatic FMV patch +generator (https://github.com/clearlinux/make-fmv-patch), we wrote an +automatic FMV tool for +Guix (https://gitlab.inria.fr/guix-hpc/function-multi-versioning): you +would give it a package name, and it would:]) + + (itemize + (item [Build the package with the ,(tt [-fopt-info-vec]) compiler flag to gather + information about vectorization opportunities and their source code + location.]) + + (item [Generate a patch that, for each C function with vectorization + opportunities, adds the ,(tt [target_clone]) + attribute to generate a couple of vectorized versions—generic, +AVX2, and + AVX-512.]) + + (item [Build the package with this FMV patch.])) + + (p [The tool can successfully FMV-patch a variety of packages +written in C, such as the GNU Scientific Library (FIXME +https://www.gnu.org/software/gsl), which contains plain sequential +implementations of a variety of math routines. It was an exciting +engineering experiment… but we found it to be all too often +inapplicable, for two reasons: performance-critical software already +does FMV, or it is not written in C.])) + + (p [There is at least one common pattern though where FMV is not +applicable, or at least is not applied: C++ header-only libraries. +These are libraries that provide generic template code in header files; +that code is specialized ,(emph [at build time]) in software that uses +them. There is no shortage of C++ header-only math libraries providing +efficient, optimized SIMD versions of their routines: Eigen, MIPP, xsimd +and xtensor, SIMD Everywhere (SIMDe), Highway, and many more. All +these, except Highway, have in common that they do ,(emph [not]) support +FMV. Since they “just” provide headers, it is up to ,(emph [each]) +package using them to figure out what to do in terms of performance +portability.]) + + (p [In practice though, software using these C++ header-only +libraries rarely makes provisions for performance portability. Thus, +when compiling those packages for the baseline ISA, one misses out on +all the vectorized implementations that libraries like Eigen provide. +This is a known issue in search of a solution ,(ref :bib +'larsen2021:eigen-fmv). It can have a very concrete impact on +performance since many scientific packages—the ARPACK-NG library for +solving eigenvalue problems, the Ceres solver for optimization problems, +the FEniCSx platform for solving differential equations, to name a +few—depend on Eigen.]) + + #;(p [Fundamentally, run-time dispatch is at odds with the all-compile-time +approach that header-only C++ template libraries are about. +Furthermore, Eigen, for example, supports fine-grain vectorization; it +may be used to operate on small matrices, as is common in computer +graphics, and in that case inlining matrix operations is key to good +performance—run-time dispatch would have to be done at a higher +level.])) + + (chapter :title [Reproducible Deployment] + :number #f + + (p [Distributions such as Debian and Fedora that provide pre-built +binaries miss out on SIMD optimizations of C++ header-only libraries +like Eigen because they provide binaries targeting the baseline CPU +architecture so that those binaries run on any CPU. The Spack ,(ref +:bib 'gamblin2015:spack) and EasyBuild ,(ref :bib 'geimer2014:easybuild) +package managers address that by ,(emph [rebuilding]) software on the +target computer, which allows them to instruct the compiler to optimize +for the host CPU.]) + + (p [Unfortunately, EasyBuild and Spack both have limited support +for reproducible deployment—they do not, in general, guarantee that you +can redeploy the same software environment on different machines, or at +different points in time. This is because they build upon software +provided by the host system—the compiler tool chain, “system” libraries, +etc.—and that foundation differs from one system to another—e.g., CentOS +might provide some version of GCC, and Ubuntu might provide another.]) + + (p [To avoid that, Guix builds software in ,(emph [isolated +environments]), as pioneered by Nix ,(ref :bib '(dolstra2004:nix +courtes2013:functional)), and its package collection is ,(emph +[self-contained])—it does not rely on external software packages. This +is what makes Guix builds reproducible bit-for-bit—or in other words, +,(emph [verifiable]) ,(ref :bib 'lamb2021:reproducible). Given binaries +and provenance data, anyone can independently verify the +binary/source-code correspondence.]) + + (p [Guix provides a command-line interface similar to that of +other package managers: ,(tt [guix install python]), for instance, +installs the Python interpreter. Package management is per-user rather +than system-wide and does not require system administrator privileges, +which makes it suitable for multi-user HPC clusters ,(ref :bib +'courtes2015:reproducible). To offer the level of flexibility that HPC +users expect, Guix lets users customize packages ,(it [via]) ,(emph +[package transformation options]) on the command line—for instance to +swap two packages in the dependency graph—or through programming +interfaces ,(ref :bib 'courtes2015:reproducible).]) + + (p [Quite uniquely, Guix supports ,(emph [“time traveling”]): with +,(tt [guix time-machine]), users can run a specific revision of Guix and +use it to deploy packages as they were defined in that revision. The +typical use case is redeploying software that was used to produce +computational results for a scientific publication ,(ref :bib +'(hinsen2020:staged-computation courtes2020:storage +perkel2020:challenge)). The command below deploys Python, NumPy, and +their dependencies as they were defined in a Guix revision from October +2021:]) + + (prog :class "small" :line #f [ +guix time-machine --commit=b0735c79b0d1d341 -- \\ + shell python python-numpy +]) + + (p [Whether you run it today or two years from now, it will deploy +the ,(emph [exact same binaries]), bit-for-bit, down to the C +library.])) + + (chapter :title [Package Multi-Versioning] + :number #f + + (p [With our packaging hammer, one could envision a solution to +these CPU tuning problems: if we cannot do function multi-versioning, +what about implementing ,(emph [package]) multi-versioning? Guix makes +it easy to define package variants, so we can define package variants +optimized for a specific CPU—compiled with ,(tt [-march=skylake]), for +instance. What we need is to define those variants “on the fly”.]) + + (p [The recently-introduced ,(tt [--tune]) package transformation +option works along those lines. Users can pass ,(tt [--tune]) to any of +the command-line tools (,(tt [guix install]), ,(tt [guix shell]), etc.) +and that causes “tunable” packages to be optimized for the host CPU. +For example, here is how you would run Eigen’s matrix multiplication +benchmark from the ,(tt [eigen-benchmarks]) package with +micro-architecture tuning:]) + + (prog :class "small" :line #f [ +$ guix shell --tune eigen-benchmarks -- \\ + benchBlasGemm 240 240 240 +guix shell: tuning for CPU skylake +240 x 240 x 240 +cblas: 0.208547 (15.908 GFlops/s) +eigen : 0.0720303 (46.06 GFlops/s) +l1: 32768 +l2: 262144 +]) + + (p [,(tt [--tune]) determines the name of the host CPU as +recognized by GCC’s (and Clang’s) ,(tt [-march]) option. Users can +override auto-detection by passing a CPU name—e.g., ,(tt +[--tune=skylake-avx512]). As mentioned earlier, we made the conscious +choice of letting ,(tt [--tune]) affect solely software that packagers +explicitly marked as “tunable”. This ensures Guix does not end up +rebuilding packages that could not possibly benefit from +micro-architecture-specific optimizations, which would be a waste of +resources.]) + + #;(p [(For the same +reason, we rejected the idea of defining separate system types for the +various x86_64 CPU micro-architectures the way Nix 2.4 did (FIXME +https://discourse.nixos.org/t/nix-2-4-released/15822#other-features-2).)]) + + #;(p [In the spirit of avoiding needless package rebuilds, ,(tt [--tune]) +leverages the “graft” mechanism (XREF +https://guix.gnu.org/manual/en/html_node/Security-Updates.html): package +variants are ,(emph [grafted]) to the dependency graph, such that dependents of +a tuned package do not need to be rebuilt. To illustrate that, consider +the figure below:]) + + ;;![Dependency graph of OpenCV, where the tuned variant of VTK is grafted.](/static/images/blog/cpu-tuning-graft.png) + + #;(p [OpenCV depends on VTK, which depends on Eigen, as shown by the +dotted arrows. VTK is marked as tunable so it can benefit from SIMD +optimizations in Eigen. When ,(tt [--tune]) is passed, the optimized variant +of VTK built with ,(tt [-march=skylake]) is generated and grafted onto the +dependency graph, such that OpenCV itself does not need to be recompiled +and instead is relinked against the optimized VTK variant.]) + + (p [This implementation of package multi-versioning does not +sacrifice reproducibility. When ,(tt [--tune]) is used, from Guix’s +viewpoint, it is just an alternate, but well-defined dependency graph +that gets built. Guix records package transformation options that were +used so it can “replay” them. For example, one can export a ,(emph +[manifest]) representing packages that have been deployed:]) + + (prog :class "small" :line #f [ +$ guix shell eigen-benchmarks --tune +guix shell: tuning for CPU skylake +\[env\]$ guix package --export-manifest \\ + -p $GUIX_ENVIRONMENT +(use-modules (guix transformations)) + +(define transform1 + (options->transformation + '((tune . "skylake")))) + +(packages->manifest + (list (transform1 + (specification->package + "eigen-benchmarks")))) +]) + + (p [The manifest above is a code snippet that can be passed to +,(tt [guix shell]) or ,(tt [guix package]) to redeploy the package with +the same tuning parameters. Like other transformation options, ,(tt +[--tune]) is accepted by all the commands; for example, here is how you +would build a Docker image tuned for a particular CPU:]) + + (prog :class "small" :line #f [ +guix pack -f docker -S /bin=bin \ + eigen-benchmarks --tune=skylake +]) + + #;(p [This comes in handy if you want to prepare an image to run on +another cluster, where you know you can rely on a given CPU extension.]) + + #;(p [The Guix build farm is set up to build a few optimized package +variants. That way, users of ,(tt [--tune]) are likely to get pre-built +binaries even for the optimized variants, making deployment just as fast +as with non-tuned packages. To achieve this, ,(tt [--tune]) skips +running test suites when building packages: we cannot be sure that build +machines implement the CPU micro-architecture at hand.])) + + (chapter :title [Conclusion and Outlook] + :number #f + + (p [We implemented what we call “package multi-versioning” for +C/C++ software that lacks function multi-versioning and run-time +dispatch, a notable example of which is optimized C++ header-only +libraries. It is another way to ensure that users do not have to trade +reproducibility for performance.]) + + ;; refs: + ;; (FIXME https://docs.julialang.org/en/v1/devdocs/sysimg/) + ;; (FIXME https://docs.rs/multiversion/0.6.1/multiversion/) + (p [The scientific programming landscape has been evolving over +the last few years. It is encouraging to see that Julia offers function +multi-versioning for its “system image”, and that, similarly, Rust +supports it with annotations similar to GCC’s ,(tt [target_clones]). +Hopefully these new development environments will support performance +portability well enough that users and packagers will not need to worry +about it.]) + + (p [But first and foremost, it is up to us, research software +engineers and scientists, to dispel the myth that performance is a valid +excuse for non-reproducible computational workflows.])) + + (chapter :title "References" + :number #f + (flush :side 'left + (the-bibliography + :sort bib-sort/first-author-last-name))) + + (!latex + "\n\\begin{IEEEbiography}{Ludovic Courtès}\n$1\n\\end{IEEEbiography}\n" + [is a research software engineer at Inria, France. He has been +contributing to the development of GNU Guix since its inception in 2012 +and works on its use in support of reproducible research workflows. He +holds a PhD in computer science from LAAS-CNRS. You can reach him at +,(it [ludovic.courtes@inria.fr]).])) + +;; Local Variables: +;; ispell-local-dictionary: "american" +;; compile-command: "guix shell -m manifest.scm -- make -j5" +;; eval: (setq indent-tabs-mode nil) +;; End: diff --git a/doc/cise-2022/hpc.sbib b/doc/cise-2022/hpc.sbib new file mode 100644 index 0000000..8094028 --- /dev/null +++ b/doc/cise-2022/hpc.sbib @@ -0,0 +1,96 @@ +(misc courtes2019:openmpi + (title "Optimized and Portable Open MPI Packaging") + (author "Ludovic Courtès") + (year "2019") + (month "December") + (url "https://hpc.guix.info/blog/2019/12/optimized-and-portable-open-mpi-packaging/")) + +(misc courtes2018:prebuilt + (title "Pre-Built Binaries vs. Performance") + (author "Ludovic Courtès") + (year "2018") + (month "January") + (url "https://hpc.guix.info/blog/2018/01/pre-built-binaries-vs-performance/")) + +(misc guennebaud2022:eigen + (title "Eigen C++ linear algebra library") + (author "Gaël Guennebaud, Benoît Jacob, et al.") + (year "2022") + (month "March") + (url "https://eigen.tuxfamily.org")) + +(inproceedings cassagne2018:mipp + (author "Adrien Cassagne, Olivier Aumage, Denis Barthou, Camille Leroux, Christophe Jégo") + (title "MIPP: A Portable C++ SIMD Wrapper and Its Use for Error Correction Coding in 5G Standard") + (year "2018") + (isbn "9781450356466") + (publisher "Association for Computing Machinery") + (address "New York, NY, USA") + (url "https://doi.org/10.1145/3178433.3178435") + (doi "10.1145/3178433.3178435") + (booktitle "Proceedings of the 2018 4th Workshop on Programming Models for SIMD/Vector Processing") + (articleno "2") + (numpages "8") + (keywords "wrapper, channel code, C++, SSE, SIMD, AVX-512, NEON, AVX") + (location "Vienna, Austria") + (series "WPMVP'18")) + +(misc larsen2021:eigen-fmv + (title "Linking modules compiled for different SIMD instruction sets") + (author "Rasmus Munk Larsen, Benoît Jacob, Antonio Sánchez") + (url "https://gitlab.com/libeigen/eigen/-/issues/2344") + (year "2021") + (month "October")) + +(inproceedings gamblin2015:spack + (author "Todd Gamblin, Matthew LeGendre, Michael R. Collette, Gregory L. Lee, Adam Moody, Bronis R. de Supinski, Scott Futral") + (title "The Spack Package Manager: Bringing Order to HPC Software Chaos") + (year "2015") + (isbn "9781450337236") + (publisher "Association for Computing Machinery") + (address "New York, NY, USA") + (url "https://doi.org/10.1145/2807591.2807623") + (doi "10.1145/2807591.2807623") + (booktitle "Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis") + (articleno "40") + (numpages "12") + (location "Austin, Texas") + (series "SC '15")) + +(inproceedings courtes2015:reproducible + (url "https://hal.inria.fr/hal-01161771/en") + (series "Lecture Notes in Computer Science") + (month "August") + (pages "579--591") + (year "2015") + (booktitle "Euro-Par 2015: Parallel Processing Workshops") + (author "Ludovic Courtès, Ricardo Wurmus") + (title "Reproducible and User-Controlled Software Environments in HPC with Guix")) + + +(article courtes2020:storage + (note "https://doi.org/10.5281/zenodo.3886739") + (doi "10.5281/zenodo.3886739") + (month "June") + (year "2020") + (number "1") + (volume "6") + (journal "ReScience C") + (author "Ludovic Courtès") + (title "[Re] Storage Tradeoffs in a Collaborative Backup Service for Mobile Devices")) + +(article perkel2020:challenge + (month "August") + (year "2020") + (note "https://www.nature.com/articles/d41586-020-02462-7") + (journal "Nature") + (author "Jeffrey M. Perkel") + (title "Challenge to Scientists: Does Your Ten-Year-Old Code Still Run?")) + +#| +(defun skr-from-bibtex () + "Vaguely convert the BibTeX snippets after POINT to SBibTeX." + (interactive) + (while (re-search-forward "\\([a-z_-]+\\) *= *[{\"]\\([^}\"]+\\)[}\"] *, *$" nil nil) + (replace-match "(\\1 \"\\2\")"))) +|# diff --git a/doc/cise-2022/images/cpu-simd-extensions.tex b/doc/cise-2022/images/cpu-simd-extensions.tex new file mode 100644 index 0000000..e9474b1 --- /dev/null +++ b/doc/cise-2022/images/cpu-simd-extensions.tex @@ -0,0 +1,15 @@ + \begin{tikzpicture}[ + box/.style = { fill=guixblue2, text=white, inner sep=3mm, rounded corners, font=\bf\sf } + ] + \matrix[row sep=6mm, column sep=6mm] { + \node(sse2) [box, draw=guixorange1, thick] {SSE2 (ca. 2003)}; & & \node(avx512) [box] {AVX-512 (2013)}; \\ + \node(sse3) [box] {SSE3}; & \node {\large\textbf{x86\_64}}; & \node(avx2) [box] {AVX2}; \\ + \node(ssse3) [box] {SSSE3}; & & \node(avx) [box] {AVX}; \\ + }; + + \path[very thick, draw=guixorange1] (sse2) edge [->] (sse3); + \path[very thick, draw=guixorange1] (sse3) edge [->] (ssse3); + \path[very thick, draw=guixorange1] (ssse3) edge [->] (avx); + \path[very thick, draw=guixorange1] (avx) edge [->] (avx2); + \path[very thick, draw=guixorange1] (avx2) edge [->] (avx512); + \end{tikzpicture} diff --git a/doc/cise-2022/manifest.scm b/doc/cise-2022/manifest.scm new file mode 100644 index 0000000..b89f6c9 --- /dev/null +++ b/doc/cise-2022/manifest.scm @@ -0,0 +1,16 @@ +(specifications->manifest + '("rubber" + + "texlive-base" + "texlive-latex-wrapfig" + + "texlive-microtype" + "texlive-latex-listings" "texlive-hyperref" + + ;; PGF/TikZ + "texlive-latex-pgf" + + ;; Additional fonts. + "texlive-cm-super" "texlive-amsfonts" + "texlive-inconsolata" "texlive-latex-xkeyval" "texlive-latex-upquote" + "texlive-times" "texlive-helvetic" "texlive-courier")) diff --git a/doc/programming-2022/security.sbib b/doc/programming-2022/security.sbib index c5a18b6..1de465b 100644 --- a/doc/programming-2022/security.sbib +++ b/doc/programming-2022/security.sbib @@ -1,5 +1,5 @@ (article lamb2021:reproducible - (author "Chris Lamb and Stefano Zacchiroli") + (author "Chris Lamb, Stefano Zacchiroli") (title "Reproducible Builds: Increasing the Integrity of Software Supply Chains") (publisher "IEEE Computer Society") (year "2021")