Add parser and handle tabs
This commit is contained in:
parent
80a1d2d770
commit
004fa3ef89
|
@ -6,6 +6,7 @@ godir=$(libdir)/guile/$(GUILE_EFFECTIVE_VERSION)/ccache
|
|||
SOURCES = \
|
||||
commonmark/utils.scm \
|
||||
commonmark/common.scm \
|
||||
commonmark/parser.scm \
|
||||
commonmark/entities.scm \
|
||||
commonmark/node.scm \
|
||||
commonmark/blocks.scm \
|
||||
|
|
|
@ -18,69 +18,13 @@
|
|||
(define-module (commonmark blocks)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (srfi srfi-26)
|
||||
#:use-module (ice-9 regex)
|
||||
#:use-module (commonmark node)
|
||||
#:use-module (commonmark utils)
|
||||
#:use-module (commonmark parser)
|
||||
#:use-module (commonmark common)
|
||||
#:export (parse-blocks))
|
||||
|
||||
|
||||
(define re-thematic-break (make-regexp "^ {0,3}((\\* *){3,}|(_ *){3,}|(- *){3,}) *$"))
|
||||
(define re-block-quote (make-regexp "^ {0,3}> ?"))
|
||||
(define re-atx-heading (make-regexp "^ {0,3}(#{1,6})( .*)?$"))
|
||||
(define re-atx-heading-end (make-regexp "^(.* )?#+ *$"))
|
||||
(define re-indented-code-block (make-regexp "^ "))
|
||||
(define re-setext-heading (make-regexp "^ {0,3}(=+|-+) *$"))
|
||||
(define re-empty-line (make-regexp "^ *$"))
|
||||
(define re-fenced-code (make-regexp "^ {0,3}(```+|~~~+)([^`]*)$"))
|
||||
(define re-bullet-list-marker (make-regexp "^ {0,3}([-+*])( +|$)"))
|
||||
(define re-ordered-list-marker (make-regexp "^ {0,3}([0-9]{1,9})([.)])( +|$)"))
|
||||
(define re-link-definition (make-regexp (string-append "^ {0,3}"
|
||||
link-label
|
||||
": *\n? *"
|
||||
link-destination
|
||||
"( +| *\n? *)"
|
||||
link-title
|
||||
"? *(\n|$)")))
|
||||
|
||||
|
||||
(define (block-quote? l)
|
||||
(regexp-exec re-block-quote l))
|
||||
|
||||
(define (atx-heading? l)
|
||||
(regexp-exec re-atx-heading l))
|
||||
|
||||
(define (atx-heading-end? l)
|
||||
(regexp-exec re-atx-heading-end l))
|
||||
|
||||
(define (code-block? l)
|
||||
(regexp-exec re-indented-code-block l))
|
||||
|
||||
(define (empty-line? l)
|
||||
(regexp-exec re-empty-line l))
|
||||
|
||||
(define (thematic-break? line)
|
||||
(regexp-exec re-thematic-break line))
|
||||
|
||||
(define (setext-heading? line)
|
||||
(regexp-exec re-setext-heading line))
|
||||
|
||||
(define (fenced-code? line)
|
||||
(regexp-exec re-fenced-code line))
|
||||
|
||||
(define (fenced-code-end? line fence)
|
||||
(string-match (string-append "^ {0,3}" fence "$") line))
|
||||
|
||||
(define (bullet-list-marker? line)
|
||||
(regexp-exec re-bullet-list-marker line))
|
||||
|
||||
(define (ordered-list-marker? line)
|
||||
(regexp-exec re-ordered-list-marker line))
|
||||
|
||||
(define (link-definition? text)
|
||||
(regexp-exec re-link-definition text))
|
||||
|
||||
|
||||
;; Port -> Document
|
||||
(define (parse-blocks port)
|
||||
"Parses CommonMark blocks from PORT returning a CommonMark Document tree"
|
||||
|
@ -91,75 +35,91 @@
|
|||
(if (null? references)
|
||||
doc
|
||||
(node-add-data doc 'link-references references))))
|
||||
(loop (parse-open-block root line)
|
||||
(loop (parse-open-block root (make-parser line))
|
||||
(read-line-without-nul port)))))
|
||||
|
||||
;; Node String -> Node
|
||||
(define (parse-open-block n l)
|
||||
(cond ((node-closed? n) n)
|
||||
((document-node? n) (parse-container-block n l))
|
||||
((block-quote-node? n) (parse-block-quote n l))
|
||||
((code-block-node? n) (parse-code-block n l))
|
||||
((fenced-code-node? n) (parse-fenced-code n l))
|
||||
((list-node? n) (parse-list n l))
|
||||
((paragraph-node? n) (parse-paragraph n l))))
|
||||
;; Node Parser -> Node
|
||||
(define (parse-open-block node parser)
|
||||
(cond ((node-closed? node) node)
|
||||
((document-node? node) (parse-container-block node parser))
|
||||
((block-quote-node? node) (parse-block-quote node parser))
|
||||
((code-block-node? node) (parse-code-block node parser))
|
||||
((fenced-code-node? node) (parse-fenced-code node parser))
|
||||
((list-node? node) (parse-list node parser))
|
||||
((paragraph-node? node) (parse-paragraph node parser))))
|
||||
|
||||
(define (parse-block-quote n l)
|
||||
(cond ((block-quote? l) => (lambda (rest-line)
|
||||
(parse-container-block n (match:suffix rest-line))))
|
||||
((open-descendant? n 'paragraph) ;; lazy continuation line
|
||||
(let ((parsed-line (parse-line l)))
|
||||
;; Node Parser -> Node
|
||||
(define (parse-container-block node parser)
|
||||
(cond ((and (no-children? node) (empty-line parser)) ;; empty line
|
||||
node)
|
||||
((no-children? node) ;; first line
|
||||
(add-child-node node (parse-line parser)))
|
||||
((and (node-closed? (last-child node)) (not (empty-line parser))) ;; new block
|
||||
(add-child-node node (parse-line parser)))
|
||||
(else (let ((new-child (parse-open-block (last-child node) parser)))
|
||||
(cond ((and (not (empty-line parser))
|
||||
(node-closed? new-child)
|
||||
(not (fenced-code-node? new-child))
|
||||
(not (heading-node? new-child)))
|
||||
(add-child-node (replace-last-child node new-child)
|
||||
(parse-line parser)))
|
||||
(else (replace-last-child node new-child)))))))
|
||||
|
||||
(define (parse-block-quote node parser)
|
||||
(cond ((block-quote (parser-advance-min-spaces parser 3)) => (lambda (rest)
|
||||
(parse-container-block node (block-quote-rest rest))))
|
||||
((open-descendant? node 'paragraph) ;; lazy continuation line
|
||||
(let ((parsed-line (parse-line parser)))
|
||||
(if (or (paragraph-node? parsed-line) (code-block-node? parsed-line))
|
||||
(parse-container-block n l)
|
||||
(close-node n))))
|
||||
(else (close-node n))))
|
||||
(parse-container-block node parser)
|
||||
(close-node node))))
|
||||
(else (close-node node))))
|
||||
|
||||
(define (parse-code-block n l)
|
||||
(cond ((code-block? l) => (lambda (rest-line)
|
||||
(add-child-node n (match:suffix rest-line))))
|
||||
((empty-line? l) (add-child-node n ""))
|
||||
(else (close-node n))))
|
||||
(define (parse-code-block node parser)
|
||||
(let ((nonspace-parser (parser-advance-next-nonspace parser)))
|
||||
(cond ((parser-indented? parser nonspace-parser)
|
||||
(add-child-node node (parser-rest-str (parser-advance parser code-indent))))
|
||||
((empty-line parser) (add-child-node node ""))
|
||||
(else (close-node node)))))
|
||||
|
||||
(define (parse-paragraph n l)
|
||||
(let ((parsed-line (parse-line l)))
|
||||
(define (parse-paragraph node parser)
|
||||
(let ((parsed-line (parse-line parser)))
|
||||
(cond ((blank-node? parsed-line)
|
||||
(close-node n))
|
||||
((and (setext-heading? l) (= (length (node-children (last-child n))) 1))
|
||||
(make-heading-node (last-child (last-child n))
|
||||
(if (string-any #\= l) 1 2)))
|
||||
(close-node node))
|
||||
((and (setext-heading (parser-advance-min-spaces parser 3)) (= (length (node-children (last-child node))) 1))
|
||||
(make-heading-node (last-child (last-child node))
|
||||
(if (string-any #\= (parser-rest-str parser)) 1 2)))
|
||||
((paragraph-node? parsed-line)
|
||||
(replace-last-child n (join-text-nodes (last-child n) (last-child parsed-line))))
|
||||
(replace-last-child node (join-text-nodes (last-child node) (last-child parsed-line))))
|
||||
((code-block-node? parsed-line)
|
||||
(replace-last-child n (add-text (last-child n) l)))
|
||||
(else (close-node n)))))
|
||||
(replace-last-child node (add-text (last-child node) (parser-rest-str parser))))
|
||||
(else (close-node node)))))
|
||||
|
||||
(define (remove-min-spaces l n)
|
||||
(let ((space-end (string-index l (lambda (c) (not (eq? #\space c))))))
|
||||
(if space-end
|
||||
(substring l (min n space-end))
|
||||
(substring l (min n (string-length l))))))
|
||||
(define (fence-start node)
|
||||
(node-get-data node 'fence-start))
|
||||
|
||||
(define (fence-start n)
|
||||
(node-get-data n 'fence-start))
|
||||
(define (fence-type node)
|
||||
(node-get-data node 'fence))
|
||||
|
||||
(define (parse-fenced-code n l)
|
||||
(cond ((fenced-code-end? l (node-get-data n 'fence))
|
||||
(close-node n))
|
||||
((no-children? n)
|
||||
(add-child-node n (remove-min-spaces l (fence-start n))))
|
||||
(else (replace-last-child n
|
||||
(string-append (last-child n)
|
||||
"\n"
|
||||
(remove-min-spaces l (fence-start n)))))))
|
||||
(define (parse-fenced-code node parser)
|
||||
(cond ((fenced-code-end (parser-advance-min-spaces parser 3) (fence-type node))
|
||||
(close-node node))
|
||||
((no-children? node)
|
||||
(add-child-node node (parser-rest-str (parser-advance-min-spaces parser (fence-start node)))))
|
||||
(else (replace-last-child
|
||||
node
|
||||
(string-append (last-child node)
|
||||
"\n"
|
||||
(parser-rest-str (parser-advance-min-spaces parser (fence-start node))))))))
|
||||
|
||||
(define (list-type n)
|
||||
(node-get-data n 'type))
|
||||
(define (list-type node)
|
||||
(node-get-data node 'type))
|
||||
|
||||
(define (list-bullet n)
|
||||
(node-get-data n 'bullet))
|
||||
(define (list-bullet node)
|
||||
(node-get-data node 'bullet))
|
||||
|
||||
(define (list-delimiter n)
|
||||
(node-get-data n 'delimiter))
|
||||
(define (list-delimiter node)
|
||||
(node-get-data node 'delimiter))
|
||||
|
||||
(define (eq-list-types? l1 l2)
|
||||
(or (and (eq? (list-type l1) (list-type l2) 'bullet)
|
||||
|
@ -167,31 +127,31 @@
|
|||
(and (eq? (list-type l1) (list-type l2) 'ordered)
|
||||
(string=? (list-delimiter l1) (list-delimiter l2)))))
|
||||
|
||||
(define (parse-list n l)
|
||||
(define (parse-list node parser)
|
||||
(define (remove-blank-node item)
|
||||
(if (blank-last-child? item)
|
||||
(remove-last-child item)
|
||||
item))
|
||||
|
||||
(define (parse-new-item n l item)
|
||||
(let ((new-list (parse-line l)))
|
||||
(define (parse-new-item node parser item)
|
||||
(let ((new-list (parse-line parser)))
|
||||
(cond ((and (list-node? new-list)
|
||||
(eq-list-types? n new-list))
|
||||
(add-child-node (replace-last-child n item)
|
||||
(eq-list-types? node new-list))
|
||||
(add-child-node (replace-last-child node item)
|
||||
(last-child new-list)))
|
||||
(else (close-node (replace-last-child n (remove-blank-node item)))))))
|
||||
(else (close-node (replace-last-child node (remove-blank-node item)))))))
|
||||
|
||||
(if (node-closed? (last-child n))
|
||||
(parse-new-item n l (last-child n))
|
||||
(let ((item (parse-item (last-child n) l)))
|
||||
(cond ((and (node-closed? item) (empty-line? l))
|
||||
(close-node (replace-last-child n item)))
|
||||
(if (node-closed? (last-child node))
|
||||
(parse-new-item node parser (last-child node))
|
||||
(let ((item (parse-item (last-child node) parser)))
|
||||
(cond ((and (node-closed? item) (empty-line parser))
|
||||
(close-node (replace-last-child node item)))
|
||||
((node-closed? item)
|
||||
(parse-new-item n l item))
|
||||
(else (replace-last-child n item))))))
|
||||
(parse-new-item node parser item))
|
||||
(else (replace-last-child node item))))))
|
||||
|
||||
(define (n-spaces? n s)
|
||||
(>= (string-index s (lambda (c) (not (char=? #\space c)))) n))
|
||||
(define (n-spaces? n str)
|
||||
(>= (string-index str (lambda (c) (not (char=? #\space c)))) n))
|
||||
|
||||
(define (item-padding node)
|
||||
(node-get-data node 'padding))
|
||||
|
@ -199,155 +159,146 @@
|
|||
(define (blank-last-child? node)
|
||||
(and (not (no-children? node)) (blank-node? (last-child node))))
|
||||
|
||||
(define (parse-item n l)
|
||||
(let ((padding (item-padding n)))
|
||||
(cond ((and (blank-last-child? n) (= (length (node-children n)) 1))
|
||||
(close-node n))
|
||||
((and (empty-line? l) (no-children? n))
|
||||
(add-child-node n (make-blank-node)))
|
||||
((and (empty-line? l) (blank-node? (last-child n)))
|
||||
(close-node (remove-last-child n)))
|
||||
((empty-line? l)
|
||||
(cond ((open-descendant? n 'fenced-code)
|
||||
(parse-container-block n l))
|
||||
((list-node? (last-child n))
|
||||
(let ((container (parse-container-block n l)))
|
||||
(define (parse-item node parser)
|
||||
(let ((padding (item-padding node)))
|
||||
(cond ((and (blank-last-child? node) (= (length (node-children node)) 1))
|
||||
(close-node node))
|
||||
((and (empty-line parser) (no-children? node))
|
||||
(add-child-node node (make-blank-node)))
|
||||
((and (empty-line parser) (blank-node? (last-child node)))
|
||||
(close-node (remove-last-child node)))
|
||||
((empty-line parser)
|
||||
(cond ((open-descendant? node 'fenced-code)
|
||||
(parse-container-block node parser))
|
||||
((list-node? (last-child node))
|
||||
(let ((container (parse-container-block node parser)))
|
||||
(if (node-closed? (last-child container))
|
||||
(close-node container)
|
||||
(add-child-node container (make-blank-node)))))
|
||||
((open-descendant? n 'code-block)
|
||||
(add-child-node (parse-container-block n l)
|
||||
((open-descendant? node 'code-block)
|
||||
(add-child-node (parse-container-block node parser)
|
||||
(make-blank-node)))
|
||||
(else (add-child-node (replace-last-child n (close-node (last-child n)))
|
||||
(else (add-child-node (replace-last-child node (close-node (last-child node)))
|
||||
(make-blank-node)))))
|
||||
((n-spaces? padding l)
|
||||
(let* ((new-line (substring l padding))
|
||||
(node (if (and (blank-last-child? n) (or (prev-node? n 'code-block) (prev-node? n 'list)))
|
||||
(remove-last-child n)
|
||||
n))
|
||||
(new-item (parse-container-block node new-line)))
|
||||
(if (and (blank-last-child? n) (prev-node? new-item 'list) (prev-node-closed? new-item))
|
||||
(add-child-node (replace-last-child new-item (last-child n)) (last-child new-item))
|
||||
((n-spaces? padding (parser-rest-str parser))
|
||||
(let* ((parser (parser-advance parser padding))
|
||||
(n (if (and (blank-last-child? node) (or (prev-node? node 'code-block) (prev-node? node 'list)))
|
||||
(remove-last-child node)
|
||||
node))
|
||||
(new-item (parse-container-block n parser)))
|
||||
(if (and (blank-last-child? node) (prev-node? new-item 'list) (prev-node-closed? new-item))
|
||||
(add-child-node (replace-last-child new-item (last-child node)) (last-child new-item))
|
||||
new-item)))
|
||||
((or (no-children? n) (blank-node? (last-child n)))
|
||||
(close-node n))
|
||||
((open-descendant? n 'paragraph)
|
||||
(let ((parsed-line (parse-line (remove-min-spaces l padding))))
|
||||
((or (no-children? node) (blank-node? (last-child node)))
|
||||
(close-node node))
|
||||
((open-descendant? node 'paragraph)
|
||||
(let ((parsed-line (parse-line (parser-advance-min-spaces parser padding))))
|
||||
(if (paragraph-node? parsed-line)
|
||||
(parse-container-block n l)
|
||||
(close-node (replace-last-child n (close-node (last-child n)))))))
|
||||
(else (close-node (replace-last-child n (close-node (last-child n))))))))
|
||||
(parse-container-block node parser)
|
||||
(close-node (replace-last-child node (close-node (last-child node)))))))
|
||||
(else (close-node (replace-last-child node (close-node (last-child node))))))))
|
||||
|
||||
;; Node String -> Node
|
||||
(define (parse-container-block n l)
|
||||
(cond ((or (no-children? n) (and (node-closed? (last-child n)) (not (empty-line? l))))
|
||||
(if (empty-line? l) n (add-child-node n (parse-line l))))
|
||||
(else (let ((new-child (parse-open-block (last-child n) l)))
|
||||
(cond ((and (not (empty-line? l))
|
||||
(node-closed? new-child)
|
||||
(not (fenced-code-node? new-child))
|
||||
(not (heading-node? new-child)))
|
||||
(add-child-node (replace-last-child n new-child)
|
||||
(parse-line l)))
|
||||
(else (replace-last-child n new-child)))))))
|
||||
|
||||
(define (heading-level s)
|
||||
(string-length s))
|
||||
(define (heading-level str)
|
||||
(string-length str))
|
||||
|
||||
;; String -> Node
|
||||
(define (parse-line l)
|
||||
(cond ((empty-line? l) (make-blank-node))
|
||||
((thematic-break? l) (make-thematic-break))
|
||||
((block-quote? l) => make-block-quote)
|
||||
((atx-heading? l) => make-atx-heading)
|
||||
((code-block? l) => make-code-block)
|
||||
((fenced-code? l) => make-fenced-code)
|
||||
((bullet-list-marker? l) => make-bullet-list-marker)
|
||||
((ordered-list-marker? l) => make-ordered-list-marker)
|
||||
(else (make-paragraph l))))
|
||||
;; Parser -> Node
|
||||
(define (parse-line parser)
|
||||
(let ((nonspace-parser (parser-advance-next-nonspace parser)))
|
||||
(cond ((empty-line nonspace-parser) (make-blank-node))
|
||||
((parser-indented? parser nonspace-parser) (make-code-block parser))
|
||||
((thematic-break nonspace-parser) (make-thematic-break))
|
||||
((block-quote nonspace-parser) => make-block-quote)
|
||||
((atx-heading nonspace-parser) => make-atx-heading)
|
||||
((fenced-code nonspace-parser) => make-fenced-code)
|
||||
((bullet-list-marker nonspace-parser) => (cut make-bullet-list-marker parser <>))
|
||||
((ordered-list-marker nonspace-parser) => (cut make-ordered-list-marker parser <>))
|
||||
(else (make-paragraph nonspace-parser)))))
|
||||
|
||||
|
||||
(define (make-thematic-break)
|
||||
(make-thematic-break-node))
|
||||
|
||||
(define (make-block-quote match)
|
||||
(make-block-quote-node (parse-line (match:suffix match))))
|
||||
(make-block-quote-node (parse-line (block-quote-rest match))))
|
||||
|
||||
(define (make-atx-heading match)
|
||||
(let* ((text (or (match:substring match 2) ""))
|
||||
(end (atx-heading-end? text)))
|
||||
(if end
|
||||
(make-heading-node (or (match:substring end 1) "") (heading-level (match:substring match 1)))
|
||||
(make-heading-node text (heading-level (match:substring match 1))))))
|
||||
(make-heading-node (atx-heading-content match)
|
||||
(heading-level (atx-heading-opening match))))
|
||||
|
||||
(define (make-code-block match)
|
||||
(make-code-block-node (match:suffix match)))
|
||||
(define (make-code-block parser)
|
||||
(make-code-block-node (parser-rest-str (parser-advance parser code-indent))))
|
||||
|
||||
(define (make-fenced-code match)
|
||||
(make-fenced-code-node
|
||||
`((fence . ,(match:substring match 1))
|
||||
(fence-start . ,(match:start match 1))
|
||||
(info-string . ,(unescape-string (string-trim-both (match:substring match 2)))))))
|
||||
`((fence . ,(fenced-code-fence match))
|
||||
(fence-start . ,(fenced-code-start match))
|
||||
(info-string . ,(unescape-string (string-trim-both (fenced-code-info-string match)))))))
|
||||
|
||||
(define (make-bullet-list-marker match)
|
||||
(make-list-node (make-item (match:suffix match) (match:end match 1) (match:substring match 2))
|
||||
`((type . bullet)
|
||||
(tight . #t)
|
||||
(bullet . ,(match:substring match 1)))))
|
||||
(define (make-bullet-list-marker parser match)
|
||||
(let ((rest-parser (bullet-list-rest parser match)))
|
||||
(make-list-node (make-item rest-parser
|
||||
(bullet-list-offset parser rest-parser)
|
||||
(bullet-list-spaces rest-parser))
|
||||
`((type . bullet)
|
||||
(tight . #t)
|
||||
(bullet . ,(bullet-list-bullet match))))))
|
||||
|
||||
(define (make-ordered-list-marker match)
|
||||
(make-list-node (make-item (match:suffix match) (match:end match 2) (match:substring match 3))
|
||||
`((type . ordered)
|
||||
(start . ,(string->number (match:substring match 1)))
|
||||
(tight . #t)
|
||||
(delimiter . ,(match:substring match 2)))))
|
||||
(define (make-ordered-list-marker parser match)
|
||||
(let ((rest-parser (ordered-list-rest parser match)))
|
||||
(make-list-node (make-item rest-parser
|
||||
(ordered-list-offset parser rest-parser)
|
||||
(ordered-list-spaces rest-parser))
|
||||
`((type . ordered)
|
||||
(start . ,(ordered-list-number match))
|
||||
(tight . #t)
|
||||
(delimiter . ,(ordered-list-delimiter match))))))
|
||||
|
||||
(define (make-item line offset spaces)
|
||||
(let ((padding (string-length spaces)))
|
||||
(cond ((>= padding 5)
|
||||
(make-item-node (parse-line (string-append (substring spaces 1) line))
|
||||
(+ offset 1)))
|
||||
((< padding 1)
|
||||
(make-item-node #f (+ offset 1)))
|
||||
(else (make-item-node (parse-line line) (+ offset padding))))))
|
||||
(define (make-item parser width spaces)
|
||||
(cond ((>= spaces 5)
|
||||
(make-item-node (parse-line (parser-advance parser 1))
|
||||
(+ width 1)))
|
||||
((< spaces 1)
|
||||
(make-item-node #f (+ width 1)))
|
||||
(else (make-item-node (parse-line (parser-advance parser spaces)) (+ width spaces)))))
|
||||
|
||||
(define (make-paragraph line)
|
||||
(make-paragraph-node line))
|
||||
(define (make-paragraph parser)
|
||||
(make-paragraph-node (parser-rest-str parser)))
|
||||
|
||||
(define (parse-clean-up n col)
|
||||
(cond ((not (node? n)) (col n '()))
|
||||
((code-block-node? n) (remove-empty-lines n col))
|
||||
((paragraph-node? n) (parse-reference-definition n col))
|
||||
((list-node? n) (clean-list-nodes n col))
|
||||
((item-node? n) (remove-blank-nodes n col))
|
||||
(else (filter-map&co parse-clean-up (node-children n)
|
||||
(define (parse-clean-up node col)
|
||||
(cond ((not (node? node)) (col node '()))
|
||||
((code-block-node? node) (remove-empty-lines node col))
|
||||
((paragraph-node? node) (parse-reference-definition node col))
|
||||
((list-node? node) (clean-list-nodes node col))
|
||||
((item-node? node) (remove-blank-nodes node col))
|
||||
(else (filter-map&co parse-clean-up (node-children node)
|
||||
(lambda (v d)
|
||||
(col (make-node (node-type n)
|
||||
(node-data n)
|
||||
(col (make-node (node-type node)
|
||||
(node-data node)
|
||||
v)
|
||||
d))))))
|
||||
|
||||
(define (reverse-join ls)
|
||||
(reduce (cut string-append <> "\n" <>) "" ls))
|
||||
|
||||
(define (parse-reference-definition n col)
|
||||
(let loop ((text (reverse-join (node-children (last-child n))))
|
||||
(define (parse-reference-definition node col)
|
||||
(let loop ((text (reverse-join (node-children (last-child node))))
|
||||
(links '()))
|
||||
(cond ((link-definition? text) =>
|
||||
(cond ((link-definition text) =>
|
||||
(lambda (match)
|
||||
(loop (match:suffix match)
|
||||
(cons (list (string-map char-downcase (match:substring match 1))
|
||||
(match:substring match 3)
|
||||
(match:substring match 7))
|
||||
(loop (link-definition-rest match)
|
||||
(cons (list (string-map char-downcase (link-definition-label match))
|
||||
(link-definition-destination match)
|
||||
(link-definition-title match))
|
||||
links))))
|
||||
(else (col (if (= (string-length text) 0)
|
||||
#f
|
||||
(make-paragraph text)) links)))))
|
||||
(make-paragraph-node text)) links)))))
|
||||
|
||||
(define (remove-empty-lines n col)
|
||||
(col (make-node (node-type n) (node-data n)
|
||||
(list (reverse-join (drop-while empty-line?
|
||||
(node-children n)))))
|
||||
(define (remove-empty-lines node col)
|
||||
(col (make-node (node-type node) (node-data node)
|
||||
(list (reverse-join (drop-while (cut string-every char-set:blank <>)
|
||||
(node-children node)))))
|
||||
'()))
|
||||
|
||||
(define (clean-list-nodes node col)
|
||||
|
@ -364,11 +315,11 @@
|
|||
v)
|
||||
d)))))
|
||||
|
||||
(define (remove-blank-nodes n col)
|
||||
(cond ((no-children? n) (col n '()))
|
||||
(else (filter-map&co parse-clean-up (filter (negate blank-node?) (node-children n))
|
||||
(define (remove-blank-nodes node col)
|
||||
(cond ((no-children? node) (col node '()))
|
||||
(else (filter-map&co parse-clean-up (filter (negate blank-node?) (node-children node))
|
||||
(lambda (v d)
|
||||
(col (make-node (node-type n)
|
||||
(node-data n)
|
||||
(col (make-node (node-type node)
|
||||
(node-data node)
|
||||
v)
|
||||
d))))))
|
||||
|
|
|
@ -0,0 +1,245 @@
|
|||
;; Copyright (C) 2016 Erik Edrosa <erik.edrosa@gmail.com>
|
||||
;;
|
||||
;; This file is part of guile-commonmark
|
||||
;;
|
||||
;; guile-commonmark is free software: you can redistribute it and/or
|
||||
;; modify it under the terms of the GNU Lesser General Public License
|
||||
;; as published by the Free Software Foundation, either version 3 of
|
||||
;; the License, or (at your option) any later version.
|
||||
;;
|
||||
;; guile-commonmark is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU Lesser General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU Lesser General Public License
|
||||
;; along with guile-commonmark. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
(define-module (commonmark parser)
|
||||
#:use-module (srfi srfi-9)
|
||||
#:use-module (ice-9 regex)
|
||||
#:use-module (commonmark common)
|
||||
#:export (make-parser
|
||||
parser-str
|
||||
parser-pos
|
||||
parser-col
|
||||
parser-end?
|
||||
parser-advance
|
||||
parser-advance-next-nonspace
|
||||
parser-advance-min-spaces
|
||||
code-indent
|
||||
parser-indented?
|
||||
parser-rest-str
|
||||
block-quote
|
||||
block-quote-rest
|
||||
atx-heading
|
||||
atx-heading-content
|
||||
atx-heading-opening
|
||||
code-block
|
||||
empty-line
|
||||
thematic-break
|
||||
setext-heading
|
||||
fenced-code
|
||||
fenced-code-fence
|
||||
fenced-code-start
|
||||
fenced-code-info-string
|
||||
fenced-code-end
|
||||
bullet-list-marker
|
||||
bullet-list-rest
|
||||
bullet-list-offset
|
||||
bullet-list-spaces
|
||||
bullet-list-bullet
|
||||
ordered-list-marker
|
||||
ordered-list-rest
|
||||
ordered-list-offset
|
||||
ordered-list-spaces
|
||||
ordered-list-number
|
||||
ordered-list-delimiter
|
||||
link-definition
|
||||
link-definition-rest
|
||||
link-definition-label
|
||||
link-definition-destination
|
||||
link-definition-title))
|
||||
|
||||
(define-record-type <parser>
|
||||
(%make-parser str pos col)
|
||||
parser?
|
||||
(str parser-str)
|
||||
(pos parser-pos)
|
||||
(col parser-col))
|
||||
|
||||
(define (make-parser str)
|
||||
(%make-parser str 0 0))
|
||||
|
||||
(define (parser-char=? parser ch)
|
||||
(char=? (string-ref (parser-str parser) (parser-pos parser))
|
||||
ch))
|
||||
|
||||
(define (parser-end? parser)
|
||||
(>= (parser-pos parser) (string-length (parser-str parser))))
|
||||
|
||||
(define (parser-advance parser offset)
|
||||
(let ((str (parser-str parser)))
|
||||
(let loop ((pos (parser-pos parser))
|
||||
(col (parser-col parser))
|
||||
(count offset))
|
||||
(cond ((>= pos (string-length str))
|
||||
(%make-parser str pos col))
|
||||
((<= count 0)
|
||||
(%make-parser str pos col))
|
||||
((char=? (string-ref str pos) #\tab)
|
||||
(let ((col-change (- 4 (modulo col 4))))
|
||||
(if (>= count col-change)
|
||||
(loop (+ pos 1) (+ col col-change) (- count col-change))
|
||||
(%make-parser str pos (+ col count)))))
|
||||
(else (loop (+ pos 1) (+ col 1) (- count 1)))))))
|
||||
|
||||
(define (parser-advance-next-nonspace parser)
|
||||
(let ((str (parser-str parser)))
|
||||
(let loop ((pos (parser-pos parser))
|
||||
(col (parser-col parser)))
|
||||
(if (>= pos (string-length str))
|
||||
(%make-parser str pos col)
|
||||
(case (string-ref str pos)
|
||||
((#\space) (loop (+ pos 1) (+ col 1)))
|
||||
((#\tab) (loop (+ pos 1) (+ col (- 4 (modulo col 4)))))
|
||||
(else (%make-parser str pos col)))))))
|
||||
|
||||
(define (parser-advance-min-spaces parser n)
|
||||
(let ((str (parser-str parser)))
|
||||
(let loop ((pos (parser-pos parser))
|
||||
(col (parser-col parser))
|
||||
(count n))
|
||||
(cond ((>= pos (string-length str))
|
||||
(%make-parser str pos col))
|
||||
((<= count 0)
|
||||
(%make-parser str pos col))
|
||||
((char=? (string-ref str pos) #\space)
|
||||
(loop (+ pos 1) (+ col 1) (- count 1)))
|
||||
((char=? (string-ref str pos) #\tab)
|
||||
(let ((col-change (- 4 (modulo col 4))))
|
||||
(if (>= count col-change)
|
||||
(loop (+ pos 1) (+ col col-change) (- count col-change))
|
||||
(%make-parser str pos (+ col count)))))
|
||||
(else (%make-parser str pos col))))))
|
||||
|
||||
(define code-indent 4)
|
||||
|
||||
(define (parser-indented? start end)
|
||||
(>= (- (parser-col end) (parser-col start)) code-indent))
|
||||
|
||||
(define (parser-rest-str parser)
|
||||
(substring (parser-str parser) (parser-pos parser)))
|
||||
|
||||
(define re-thematic-break (make-regexp "^((\\*[ \t]*){3,}|(_[ \t]*){3,}|(-[ \t]*){3,})[ \t]*$"))
|
||||
(define re-atx-heading (make-regexp "^(#{1,6})([ \t]+|$)"))
|
||||
(define re-atx-heading-end (make-regexp "([ \t]+#+[ \t]*)$|(^#+[ \t]*)$"))
|
||||
(define re-setext-heading (make-regexp "^(=+|-+) *$"))
|
||||
(define re-empty-line (make-regexp "^[ \t]*$"))
|
||||
(define re-fenced-code (make-regexp "^(```+|~~~+)([^`]*)$"))
|
||||
(define re-bullet-list-marker (make-regexp "^([-+*])([ \t]|$)"))
|
||||
(define re-ordered-list-marker (make-regexp "^([0-9]{1,9})([.)])([ \t]|$)"))
|
||||
(define re-link-definition (make-regexp (string-append "^"
|
||||
link-label
|
||||
": *\n? *"
|
||||
link-destination
|
||||
"( +| *\n? *)"
|
||||
link-title
|
||||
"? *(\n|$)")))
|
||||
|
||||
|
||||
(define (block-quote parser)
|
||||
(if (and (not (parser-end? parser)) (parser-char=? parser #\>))
|
||||
(parser-advance parser 1)
|
||||
#f))
|
||||
|
||||
(define (block-quote-rest parser)
|
||||
(if (and (not (parser-end? parser))
|
||||
(or (parser-char=? parser #\space)
|
||||
(parser-char=? parser #\tab)))
|
||||
(parser-advance parser 1)
|
||||
parser))
|
||||
|
||||
(define (atx-heading parser)
|
||||
(regexp-exec re-atx-heading (parser-str parser) (parser-pos parser)))
|
||||
|
||||
(define (atx-heading-content match)
|
||||
(let ((end-match (regexp-exec re-atx-heading-end (match:suffix match))))
|
||||
(if end-match
|
||||
(match:prefix end-match)
|
||||
(match:suffix match))))
|
||||
|
||||
(define (atx-heading-opening match)
|
||||
(match:substring match 1))
|
||||
|
||||
(define (fenced-code-fence match)
|
||||
(match:substring match 1))
|
||||
|
||||
(define (fenced-code-start match)
|
||||
(match:start match 1))
|
||||
|
||||
(define (fenced-code-info-string match)
|
||||
(match:substring match 2))
|
||||
|
||||
(define (empty-line parser)
|
||||
(regexp-exec re-empty-line (parser-str parser) (parser-pos parser)))
|
||||
|
||||
(define (thematic-break parser)
|
||||
(regexp-exec re-thematic-break (parser-str parser) (parser-pos parser)))
|
||||
|
||||
(define (setext-heading parser)
|
||||
(regexp-exec re-setext-heading (parser-str parser) (parser-pos parser)))
|
||||
|
||||
(define (fenced-code parser)
|
||||
(regexp-exec re-fenced-code (parser-str parser) (parser-pos parser)))
|
||||
|
||||
(define (fenced-code-end parser fence)
|
||||
(string-match (string-append "^" fence "$") (parser-str parser) (parser-pos parser)))
|
||||
|
||||
(define (bullet-list-marker parser)
|
||||
(regexp-exec re-bullet-list-marker (parser-str parser) (parser-pos parser)))
|
||||
|
||||
(define (bullet-list-rest parser match)
|
||||
(parser-advance parser (- (match:end match 1) (parser-col parser))))
|
||||
|
||||
(define (bullet-list-offset parser parser-rest)
|
||||
(- (parser-col parser-rest) (parser-col parser)))
|
||||
|
||||
(define (bullet-list-spaces parser)
|
||||
(- (parser-col (parser-advance-next-nonspace parser)) (parser-col parser)))
|
||||
|
||||
(define (bullet-list-bullet match)
|
||||
(match:substring match 1))
|
||||
|
||||
(define (ordered-list-marker parser)
|
||||
(regexp-exec re-ordered-list-marker (parser-str parser) (parser-pos parser)))
|
||||
|
||||
(define (ordered-list-rest parser match)
|
||||
(parser-advance parser (- (match:end match 2) (parser-col parser))))
|
||||
|
||||
(define (ordered-list-offset parser parser-rest)
|
||||
(- (parser-col parser-rest) (parser-col parser)))
|
||||
|
||||
(define (ordered-list-spaces parser)
|
||||
(- (parser-col (parser-advance-next-nonspace parser)) (parser-col parser)))
|
||||
|
||||
(define (ordered-list-number match)
|
||||
(string->number (match:substring match 1)))
|
||||
|
||||
(define (ordered-list-delimiter match)
|
||||
(match:substring match 2))
|
||||
|
||||
(define (link-definition str)
|
||||
(regexp-exec re-link-definition str))
|
||||
|
||||
(define (link-definition-rest match)
|
||||
(match:suffix match))
|
||||
|
||||
(define (link-definition-label match)
|
||||
(match:substring match 1))
|
||||
|
||||
(define (link-definition-destination match)
|
||||
(match:substring match 3))
|
||||
|
||||
(define (link-definition-title match)
|
||||
(match:substring match 7))
|
Loading…
Reference in New Issue