From f19c9291cd1e3dd2e5b5ff8504e8e13159cf96b7 Mon Sep 17 00:00:00 2001 From: Badri Sunderarajan Date: Wed, 30 Oct 2024 17:57:55 +0530 Subject: [PATCH] Add Steven Sprang's commonmark+scm reader Thanks, Steven, for actually writing this utility! --- haunt.scm | 5 +- nybble/reader/commonmark+scm.scm | 86 ++++++++++++++++++++++++++++++++ nybble/sxml.scm | 61 ++++++++++++++++++++++ nybble/typography.scm | 86 ++++++++++++++++++++++++++++++++ 4 files changed, 236 insertions(+), 2 deletions(-) create mode 100644 nybble/reader/commonmark+scm.scm create mode 100644 nybble/sxml.scm create mode 100644 nybble/typography.scm diff --git a/haunt.scm b/haunt.scm index 59804ad..d55ff65 100644 --- a/haunt.scm +++ b/haunt.scm @@ -6,6 +6,7 @@ (hippo builder atom-extless) (haunt builder assets) (haunt reader commonmark) + (nybble reader commonmark+scm) (haunt site) (haunt publisher rsync)) @@ -219,7 +220,7 @@ the given additional patterns" (extend-file-filter filter '("\\.(jpg|png)$"))) (define (exclude-posts filter) - (extend-file-filter filter '("\\.(md|html?|sxml)$"))) + (extend-file-filter filter '("\\.(scmd|md|html?|sxml)$"))) (define* (inline-images site posts) "Recursively copy all images that are inside the site's @@ -249,7 +250,7 @@ site filter has to filter out images too" #:posts-directory "posts" #:file-filter (exclude-images default-file-filter) #:make-slug post-slug-category - #:readers (list commonmark-reader) + #:readers (list commonmark-reader commonmark+scm-reader) #:builders (list (blog #:prefix "" #:theme hippo-haunt-theme #:collections diff --git a/nybble/reader/commonmark+scm.scm b/nybble/reader/commonmark+scm.scm new file mode 100644 index 0000000..a8ab7de --- /dev/null +++ b/nybble/reader/commonmark+scm.scm @@ -0,0 +1,86 @@ +;;; Copyright (C) 2023 Steve Sprang +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 3 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program. If not, see . + +(define-module (nybble reader commonmark+scm) + #:use-module (nybble sxml) + #:use-module (commonmark) + #:use-module (haunt post) + #:use-module (haunt reader) + #:use-module (ice-9 rdelim) + #:export (commonmark+scm-reader)) + +;;; This is a custom reader for markdown with embedded scheme (to generate +;;; custom SXML). After the normal markdown metadata section, we look for +;;; another section that defines custom scheme code. The remainder of the file +;;; is then normal markdown with interspersed scheme blocks between lines +;;; consisting of "%%%". +;;; +;;; Example content: +;;; +;;; Metadata +;;; --- +;;; Preamble: Scheme code importing modules and defining functions. +;;; --- +;;; Markdown +;;; %%% +;;; Code block +;;; %%% +;;; Markdown + +(define %begin-quasiquote "(quasiquote (") +(define %end-quasiquote "))") +(define %empty-buffer "") + +(define (quasiquote-string str) + (string-append %begin-quasiquote str %end-quasiquote)) + +(define (read-preamble port) + (let loop ((preamble %empty-buffer)) + (let ((line (read-line port))) + (cond + ((eof-object? line) (error "End of file while reading preamble: " + (port-filename port))) + ((string=? "---" line) preamble) ; end of preamble + (else (loop (string-append preamble line "\n"))))))) + +(define (commonmark+scm->sxml port) + (let loop ((sxml '()) (buffer %empty-buffer) (in-code-block #f)) + (let ((line (read-line port))) + (cond + ((eof-object? line) + (when in-code-block + (error "End of file while reading code block: " (port-filename port))) + ;; finished reading file: return the reversed list of sxml + (reverse (cons (commonmark->sxml (string-trim-both buffer)) sxml))) + ((string-prefix? "%%%" line) + (if in-code-block + ;; done reading code block: prepend evaluated code to sxml + (loop (cons (eval-string (quasiquote-string buffer)) sxml) %empty-buffer #f) + ;; begin reading code block: prepend accumulated markdown to sxml + (loop (cons (commonmark->sxml (string-trim-both buffer)) sxml) %empty-buffer #t))) + (else + ;; accumulate input until next %%% line or EOF + (loop sxml (string-append buffer line "\n") in-code-block)))))) + +(define commonmark+scm-reader + (make-reader (make-file-extension-matcher "scmd") + (lambda (file) + (call-with-input-file file + (lambda (port) + (let ((metadata (read-metadata-headers port)) + (preamble (read-preamble port))) + (eval-string preamble) + (values metadata + (process-sxml-text (commonmark+scm->sxml port))))))))) diff --git a/nybble/sxml.scm b/nybble/sxml.scm new file mode 100644 index 0000000..f4e9860 --- /dev/null +++ b/nybble/sxml.scm @@ -0,0 +1,61 @@ +;;; Copyright (C) 2023 Steve Sprang +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 3 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program. If not, see . + +(define-module (nybble sxml) + #:use-module (nybble typography) + #:use-module (ice-9 match) + #:use-module (srfi srfi-26) + #:use-module (sxml transform) + #:export (process-sxml-text + test-text-processing)) + +(define %verbatim-elements + '(code + kbd + pre)) + +(define (verbatim-element? tag) + (pair? (memq tag %verbatim-elements))) + +(define (process-tag tag attrs body f) + (let ([f (if (verbatim-element? tag) identity f)]) + (append (list tag) + (if (pair? attrs) `((@ ,@attrs)) '()) + (map (cut process-sxml-text <> f) body)))) + +(define* (process-sxml-text tree #:optional (f smart-text)) + (match tree + (((? symbol? tag) ('@ attrs ...) body ...) + (process-tag tag attrs body f)) + (((? symbol? tag) body ...) + (process-tag tag '() body f)) + ((nodes ...) + (map (cut process-sxml-text <> f) nodes)) + ((? string? text) + (f text)))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Alternative Approach +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (sxml-identity . args) args) + +;; how to ignore verbatim elements? +(define (%text-rules f) + `((*text* . ,(lambda (tag str) (f str))) + (*default* . ,sxml-identity))) + +(define* (process-sxml-text-alt sxml #:optional (f smart-text)) + (pre-post-order sxml (%text-rules f))) diff --git a/nybble/typography.scm b/nybble/typography.scm new file mode 100644 index 0000000..897324a --- /dev/null +++ b/nybble/typography.scm @@ -0,0 +1,86 @@ +;;; Copyright (C) 2023 Steve Sprang +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 3 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program. If not, see . + +(define-module (nybble typography) + #:use-module (ice-9 regex) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-26) ; cut + #:export (TeX-text + smart-text + smart-ellipsis + smart-dashes + TeX-style-quotes + smart-quotes)) + +(define (TeX-text text) + ((compose TeX-style-quotes smart-dashes smart-ellipsis) text)) + +; smart-quotes disabled by badrihippo because they were messing with JavaScript +(define (smart-text text) + ((compose smart-dashes smart-ellipsis) text)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Ellipses and Dashes +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define smart-ellipsis + (cut regexp-substitute/global #f "\\.{3}" <> 'pre "…" 'post)) + +(define smart-dashes + (compose + (cut regexp-substitute/global #f "--" <> 'pre "–" 'post) + (cut regexp-substitute/global #f "---" <> 'pre "—" 'post))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; TeX Quotes +;; +;; Precisely controlled quotes, but can be interpreted as code spans when using +;; CommonMark +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define TeX-style-quotes + (compose + (cut regexp-substitute/global #f "`" <> 'pre "‘" 'post) + (cut regexp-substitute/global #f "'" <> 'pre "’" 'post) + (cut regexp-substitute/global #f "``" <> 'pre "“" 'post) + (cut regexp-substitute/global #f "''" <> 'pre "”" 'post))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Smart Quotes +;; +;; Some heuristics that work most of the time. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (%space-before str) + (string-append "(^\|[[:space:]—\\(]+)" str)) + +(define (%space-after str) + (string-append str "($\|[[:space:]—\\)]+)")) + +(define smart-quotes + (compose + ;; single quotes: + (cut regexp-substitute/global #f (%space-before "'") <> 'pre 1 "‘" 'post) + (cut regexp-substitute/global #f (%space-after "'") <> 'pre "’" 1 'post) + ;; double quotes: + (cut regexp-substitute/global #f (%space-before "\"") <> 'pre 1 "“" 'post) + (cut regexp-substitute/global #f (%space-after "\"") <> 'pre "”" 1 'post) + ;; contractions: + (cut regexp-substitute/global #f "([[:alnum:]])'([[:alpha:]])" <> 'pre 1 "’" 2 'post) + ;; decades: + (cut regexp-substitute/global #f "'([0-9]{2})" <> 'pre "’" 1 'post) + ;; triple quotes: + (cut regexp-substitute/global #f "\"'" <> 'pre "“ ‘" 'post) + (cut regexp-substitute/global #f "'\"" <> 'pre "’ ”" 'post)))