Some checks are pending
Test, Build, and Deploy / test-build-deploy (push) Waiting to run
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
450 lines
11 KiB
Plaintext
450 lines
11 KiB
Plaintext
;; content-on-sx — Markdown import adapter (markdown text -> block document).
|
|
;;
|
|
;; A line-based parser, the inverse of markdown.sx's asMarkdown. Confined to the
|
|
;; adapter boundary: the core knows nothing about Markdown. Handles a leading
|
|
;; --- frontmatter block (key: value -> doc metadata), ATX headings (#..######),
|
|
;; fenced code (```lang), blockquotes (> ), unordered (- / * ) and ordered (1. )
|
|
;; lists, thematic breaks (--- / ***), pipe tables (header + --- separator +
|
|
;; body), and paragraphs (consecutive plain lines joined with a space). Block ids
|
|
;; are assigned sequentially b0,b1…
|
|
;;
|
|
;; Requires (loaded by harness): block.sx, doc.sx, table.sx (mk-table),
|
|
;; meta.sx (doc-with-meta); markdown.sx for the adapter's export side.
|
|
|
|
(define md/-id (fn (i) (str "b" i)))
|
|
(define md/-blank? (fn (s) (= s "")))
|
|
(define md/-hr? (fn (s) (if (= s "---") true (= s "***"))))
|
|
|
|
(define
|
|
ct-in?
|
|
(fn
|
|
(x xs)
|
|
(cond
|
|
((= (len xs) 0) false)
|
|
((= (first xs) x) true)
|
|
(else (ct-in? x (rest xs))))))
|
|
|
|
(define
|
|
ct-starts-with?
|
|
(fn
|
|
(s prefix)
|
|
(and
|
|
(>= (string-length s) (string-length prefix))
|
|
(= (substring s 0 (string-length prefix)) prefix))))
|
|
|
|
(define
|
|
md/-drop
|
|
(fn (s prefix) (substring s (string-length prefix) (string-length s))))
|
|
|
|
(define
|
|
md/-drop-n
|
|
(fn
|
|
(xs n)
|
|
(if
|
|
(= n 0)
|
|
xs
|
|
(if
|
|
(= (len xs) 0)
|
|
xs
|
|
(md/-drop-n (rest xs) (- n 1))))))
|
|
|
|
(define
|
|
md/-join-with
|
|
(fn
|
|
(sep parts)
|
|
(cond
|
|
((= (len parts) 0) "")
|
|
((= (len parts) 1) (first parts))
|
|
(else (str (first parts) sep (md/-join-with sep (rest parts)))))))
|
|
(define md/-join-sp (fn (parts) (md/-join-with " " parts)))
|
|
(define md/-join-nl (fn (parts) (md/-join-with (str "\n") parts)))
|
|
|
|
;; ── heading detection (leading #s then a space) ──
|
|
(define
|
|
md/-hashes
|
|
(fn
|
|
(s n)
|
|
(if
|
|
(and
|
|
(< n (string-length s))
|
|
(= (substring s n (+ n 1)) "#"))
|
|
(md/-hashes s (+ n 1))
|
|
n)))
|
|
(define
|
|
md/-heading?
|
|
(fn
|
|
(line)
|
|
(let
|
|
((n (md/-hashes line 0)))
|
|
(and
|
|
(> n 0)
|
|
(<= n 6)
|
|
(> (string-length line) n)
|
|
(= (substring line n (+ n 1)) " ")))))
|
|
(define
|
|
md/-heading-block
|
|
(fn
|
|
(line i)
|
|
(let
|
|
((n (md/-hashes line 0)))
|
|
(mk-heading
|
|
(md/-id i)
|
|
n
|
|
(substring line (+ n 1) (string-length line))))))
|
|
|
|
;; ── list detection ──
|
|
(define
|
|
ct-digit?
|
|
(fn (ch) (ct-in? ch (list "0" "1" "2" "3" "4" "5" "6" "7" "8" "9"))))
|
|
(define
|
|
md/-digits
|
|
(fn
|
|
(s n)
|
|
(if
|
|
(and
|
|
(< n (string-length s))
|
|
(ct-digit? (substring s n (+ n 1))))
|
|
(md/-digits s (+ n 1))
|
|
n)))
|
|
(define
|
|
md/-ol?
|
|
(fn
|
|
(line)
|
|
(let
|
|
((n (md/-digits line 0)))
|
|
(and
|
|
(> n 0)
|
|
(>= (string-length line) (+ n 2))
|
|
(= (substring line n (+ n 2)) ". ")))))
|
|
(define
|
|
md/-drop-ol
|
|
(fn
|
|
(line)
|
|
(let
|
|
((n (md/-digits line 0)))
|
|
(substring line (+ n 2) (string-length line)))))
|
|
(define
|
|
md/-ul?
|
|
(fn
|
|
(line)
|
|
(if (ct-starts-with? line "- ") true (ct-starts-with? line "* "))))
|
|
(define
|
|
md/-drop-ul
|
|
(fn (line) (substring line 2 (string-length line))))
|
|
|
|
;; ── table detection ──
|
|
(define md/-pipe-row? (fn (line) (ct-starts-with? (trim line) "|")))
|
|
(define md/-sep-char? (fn (ch) (ct-in? ch (list "-" ":" "|" " "))))
|
|
(define
|
|
md/-all-sep?
|
|
(fn
|
|
(s i)
|
|
(if
|
|
(>= i (string-length s))
|
|
true
|
|
(if
|
|
(md/-sep-char? (substring s i (+ i 1)))
|
|
(md/-all-sep? s (+ i 1))
|
|
false))))
|
|
(define
|
|
md/-has-dash?
|
|
(fn
|
|
(s i)
|
|
(if
|
|
(>= i (string-length s))
|
|
false
|
|
(if
|
|
(= (substring s i (+ i 1)) "-")
|
|
true
|
|
(md/-has-dash? s (+ i 1))))))
|
|
(define
|
|
md/-sep-row?
|
|
(fn
|
|
(line)
|
|
(and
|
|
(md/-pipe-row? line)
|
|
(md/-all-sep? (trim line) 0)
|
|
(md/-has-dash? line 0))))
|
|
(define
|
|
md/-table-start?
|
|
(fn
|
|
(lines)
|
|
(and
|
|
(md/-pipe-row? (first lines))
|
|
(> (len lines) 1)
|
|
(md/-sep-row? (nth lines 1)))))
|
|
(define
|
|
md/-strip-pipes
|
|
(fn
|
|
(s0)
|
|
(let
|
|
((s (trim s0)))
|
|
(let
|
|
((a (if (ct-starts-with? s "|") (substring s 1 (string-length s)) s)))
|
|
(if
|
|
(and
|
|
(> (string-length a) 0)
|
|
(=
|
|
(substring
|
|
a
|
|
(- (string-length a) 1)
|
|
(string-length a))
|
|
"|"))
|
|
(substring a 0 (- (string-length a) 1))
|
|
a)))))
|
|
(define
|
|
md/-cells
|
|
(fn (line) (map (fn (c) (trim c)) (split (md/-strip-pipes line) "|"))))
|
|
|
|
(define
|
|
md/-plain?
|
|
(fn
|
|
(line)
|
|
(if
|
|
(md/-blank? line)
|
|
false
|
|
(if
|
|
(ct-starts-with? line "```")
|
|
false
|
|
(if
|
|
(md/-heading? line)
|
|
false
|
|
(if
|
|
(ct-starts-with? line "> ")
|
|
false
|
|
(if
|
|
(md/-hr? line)
|
|
false
|
|
(if (md/-ul? line) false (if (md/-ol? line) false true)))))))))
|
|
|
|
;; ── multi-line collectors ──
|
|
(define
|
|
md/-code
|
|
(fn
|
|
(lines i acc)
|
|
(md/-code-collect
|
|
(rest lines)
|
|
(md/-drop (first lines) "```")
|
|
(list)
|
|
i
|
|
acc)))
|
|
(define
|
|
md/-code-collect
|
|
(fn
|
|
(lines lang body i acc)
|
|
(cond
|
|
((= (len lines) 0)
|
|
(md/-walk
|
|
lines
|
|
(+ i 1)
|
|
(cons (mk-code (md/-id i) lang (md/-join-nl (reverse body))) acc)))
|
|
((= (first lines) "```")
|
|
(md/-walk
|
|
(rest lines)
|
|
(+ i 1)
|
|
(cons (mk-code (md/-id i) lang (md/-join-nl (reverse body))) acc)))
|
|
(else
|
|
(md/-code-collect (rest lines) lang (cons (first lines) body) i acc)))))
|
|
|
|
(define
|
|
md/-table-body
|
|
(fn
|
|
(lines headers rows i acc)
|
|
(if
|
|
(= (len lines) 0)
|
|
(md/-walk
|
|
lines
|
|
(+ i 1)
|
|
(cons (mk-table (md/-id i) headers (reverse rows)) acc))
|
|
(let
|
|
((line (first lines)))
|
|
(if
|
|
(md/-pipe-row? line)
|
|
(md/-table-body
|
|
(rest lines)
|
|
headers
|
|
(cons (md/-cells line) rows)
|
|
i
|
|
acc)
|
|
(md/-walk
|
|
lines
|
|
(+ i 1)
|
|
(cons (mk-table (md/-id i) headers (reverse rows)) acc)))))))
|
|
(define
|
|
md/-table
|
|
(fn
|
|
(lines i acc)
|
|
(md/-table-body
|
|
(rest (rest lines))
|
|
(md/-cells (first lines))
|
|
(list)
|
|
i
|
|
acc)))
|
|
|
|
(define
|
|
md/-list-collect
|
|
(fn
|
|
(lines items i acc ordered)
|
|
(if
|
|
(= (len lines) 0)
|
|
(md/-walk
|
|
lines
|
|
(+ i 1)
|
|
(cons (mk-list (md/-id i) ordered (reverse items)) acc))
|
|
(let
|
|
((line (first lines)))
|
|
(cond
|
|
(ordered
|
|
(if
|
|
(md/-ol? line)
|
|
(md/-list-collect
|
|
(rest lines)
|
|
(cons (md/-drop-ol line) items)
|
|
i
|
|
acc
|
|
ordered)
|
|
(md/-walk
|
|
lines
|
|
(+ i 1)
|
|
(cons (mk-list (md/-id i) ordered (reverse items)) acc))))
|
|
(else
|
|
(if
|
|
(md/-ul? line)
|
|
(md/-list-collect
|
|
(rest lines)
|
|
(cons (md/-drop-ul line) items)
|
|
i
|
|
acc
|
|
ordered)
|
|
(md/-walk
|
|
lines
|
|
(+ i 1)
|
|
(cons (mk-list (md/-id i) ordered (reverse items)) acc)))))))))
|
|
|
|
(define
|
|
md/-para-collect
|
|
(fn
|
|
(lines parts i acc)
|
|
(if
|
|
(= (len lines) 0)
|
|
(md/-walk
|
|
lines
|
|
(+ i 1)
|
|
(cons (mk-text (md/-id i) (md/-join-sp (reverse parts))) acc))
|
|
(let
|
|
((line (first lines)))
|
|
(if
|
|
(md/-plain? line)
|
|
(md/-para-collect (rest lines) (cons line parts) i acc)
|
|
(md/-walk
|
|
lines
|
|
(+ i 1)
|
|
(cons (mk-text (md/-id i) (md/-join-sp (reverse parts))) acc)))))))
|
|
|
|
;; ── main walk ──
|
|
(define
|
|
md/-walk
|
|
(fn
|
|
(lines i acc)
|
|
(if
|
|
(= (len lines) 0)
|
|
(reverse acc)
|
|
(let
|
|
((line (first lines)))
|
|
(cond
|
|
((md/-blank? line) (md/-walk (rest lines) i acc))
|
|
((ct-starts-with? line "```") (md/-code lines i acc))
|
|
((md/-heading? line)
|
|
(md/-walk
|
|
(rest lines)
|
|
(+ i 1)
|
|
(cons (md/-heading-block line i) acc)))
|
|
((ct-starts-with? line "> ")
|
|
(md/-walk
|
|
(rest lines)
|
|
(+ i 1)
|
|
(cons (mk-quote (md/-id i) "" (md/-drop line "> ")) acc)))
|
|
((md/-hr? line)
|
|
(md/-walk
|
|
(rest lines)
|
|
(+ i 1)
|
|
(cons (mk-divider (md/-id i)) acc)))
|
|
((md/-table-start? lines) (md/-table lines i acc))
|
|
((md/-ul? line) (md/-list-collect lines (list) i acc false))
|
|
((md/-ol? line) (md/-list-collect lines (list) i acc true))
|
|
(else (md/-para-collect lines (list) i acc)))))))
|
|
|
|
(define
|
|
md/parse
|
|
(fn (text) (md/-walk (split text (str "\n")) 0 (list))))
|
|
|
|
;; ── frontmatter (leading --- key: value --- block) ──
|
|
(define
|
|
md/-frontmatter?
|
|
(fn (lines) (and (> (len lines) 0) (= (first lines) "---"))))
|
|
(define
|
|
md/-fm-end
|
|
(fn
|
|
(lines i)
|
|
(cond
|
|
((>= i (len lines)) -1)
|
|
((= (nth lines i) "---") i)
|
|
(else (md/-fm-end lines (+ i 1))))))
|
|
(define
|
|
md/-fm-add
|
|
(fn
|
|
(acc line)
|
|
(let
|
|
((parts (split line ":")))
|
|
(if
|
|
(< (len parts) 2)
|
|
acc
|
|
(let
|
|
((key (trim (first parts)))
|
|
(val (trim (md/-join-with ":" (rest parts)))))
|
|
(cond
|
|
((= key "title") (assoc acc :title val))
|
|
((= key "slug") (assoc acc :slug val))
|
|
((= key "tags")
|
|
(assoc acc :tags (map (fn (t) (trim t)) (split val ","))))
|
|
(else acc)))))))
|
|
(define
|
|
md/-fm-pairs
|
|
(fn
|
|
(lines start end acc)
|
|
(if
|
|
(>= start end)
|
|
acc
|
|
(md/-fm-pairs
|
|
lines
|
|
(+ start 1)
|
|
end
|
|
(md/-fm-add acc (nth lines start))))))
|
|
|
|
;; ── adapter ──
|
|
(define
|
|
md/import
|
|
(fn
|
|
(text doc-id)
|
|
(let
|
|
((lines (split text (str "\n"))))
|
|
(if
|
|
(md/-frontmatter? lines)
|
|
(let
|
|
((end (md/-fm-end lines 1)))
|
|
(if
|
|
(= end -1)
|
|
(doc-new doc-id (md/-walk lines 0 (list)))
|
|
(doc-with-meta
|
|
(doc-new
|
|
doc-id
|
|
(md/-walk
|
|
(md/-drop-n lines (+ end 1))
|
|
0
|
|
(list)))
|
|
(md/-fm-pairs lines 1 end {}))))
|
|
(doc-new doc-id (md/-walk lines 0 (list)))))))
|
|
|
|
(define content/from-markdown md/import)
|
|
(define markdown-adapter {:export (fn (doc) (asMarkdown doc)) :import md/import})
|