From 295864786dad7dafef3a7b5d7b49f2ba5ef9237d Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 7 Jun 2026 01:33:50 +0000 Subject: [PATCH] content: Markdown import adapter (md-import) + 24 tests (318/318) Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/content/conformance.sh | 3 +- lib/content/md-import.sx | 270 +++++++++++++++++++++++++++++++++ lib/content/scoreboard.json | 5 +- lib/content/scoreboard.md | 3 +- lib/content/tests/md-import.sx | 120 +++++++++++++++ plans/content-on-sx.md | 10 +- 6 files changed, 406 insertions(+), 5 deletions(-) create mode 100644 lib/content/md-import.sx create mode 100644 lib/content/tests/md-import.sx diff --git a/lib/content/conformance.sh b/lib/content/conformance.sh index b74c3590..8084c116 100755 --- a/lib/content/conformance.sh +++ b/lib/content/conformance.sh @@ -15,7 +15,7 @@ if [ ! -x "$SX_SERVER" ]; then fi fi -SUITES=(block doc render api markdown validate store crdt crdt-store sync fed) +SUITES=(block doc render api markdown validate store crdt crdt-store sync md-import fed) OUT_JSON="lib/content/scoreboard.json" OUT_MD="lib/content/scoreboard.md" @@ -48,6 +48,7 @@ run_suite() { (load "lib/content/crdt.sx") (load "lib/content/crdt-store.sx") (load "lib/content/sync.sx") +(load "lib/content/md-import.sx") (load "lib/content/fed.sx") (epoch 2) (eval "(define content-test-pass 0)") diff --git a/lib/content/md-import.sx b/lib/content/md-import.sx new file mode 100644 index 00000000..5abd885c --- /dev/null +++ b/lib/content/md-import.sx @@ -0,0 +1,270 @@ +;; content-on-sx — Markdown import adapter (markdown text -> block document). +;; +;; A line-based parser, the inverse of markdown.sx's asMarkdown. Confined to the +;; adapter boundary: the core knows nothing about Markdown. Handles ATX headings +;; (#..######), fenced code (```lang), blockquotes (> ), unordered (- / * ) and +;; ordered (1. ) lists, thematic breaks (--- / ***), and paragraphs (consecutive +;; plain lines joined with a space). Block ids are assigned sequentially b0,b1… +;; +;; Requires (loaded by harness): block.sx, doc.sx (and markdown.sx for the +;; adapter's export side). + +(define md/-id (fn (i) (str "b" i))) +(define md/-blank? (fn (s) (= s ""))) +(define md/-hr? (fn (s) (if (= s "---") true (= s "***")))) + +(define + ct-in? + (fn + (x xs) + (cond + ((= (len xs) 0) false) + ((= (first xs) x) true) + (else (ct-in? x (rest xs)))))) + +(define + ct-starts-with? + (fn + (s prefix) + (and + (>= (string-length s) (string-length prefix)) + (= (substring s 0 (string-length prefix)) prefix)))) + +(define + md/-drop + (fn (s prefix) (substring s (string-length prefix) (string-length s)))) + +(define + md/-join-with + (fn + (sep parts) + (cond + ((= (len parts) 0) "") + ((= (len parts) 1) (first parts)) + (else (str (first parts) sep (md/-join-with sep (rest parts))))))) +(define md/-join-sp (fn (parts) (md/-join-with " " parts))) +(define md/-join-nl (fn (parts) (md/-join-with (str "\n") parts))) + +;; ── heading detection (leading #s then a space) ── +(define + md/-hashes + (fn + (s n) + (if + (and + (< n (string-length s)) + (= (substring s n (+ n 1)) "#")) + (md/-hashes s (+ n 1)) + n))) +(define + md/-heading? + (fn + (line) + (let + ((n (md/-hashes line 0))) + (and + (> n 0) + (<= n 6) + (> (string-length line) n) + (= (substring line n (+ n 1)) " "))))) +(define + md/-heading-block + (fn + (line i) + (let + ((n (md/-hashes line 0))) + (mk-heading + (md/-id i) + n + (substring line (+ n 1) (string-length line)))))) + +;; ── list detection ── +(define + ct-digit? + (fn (ch) (ct-in? ch (list "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")))) +(define + md/-digits + (fn + (s n) + (if + (and + (< n (string-length s)) + (ct-digit? (substring s n (+ n 1)))) + (md/-digits s (+ n 1)) + n))) +(define + md/-ol? + (fn + (line) + (let + ((n (md/-digits line 0))) + (and + (> n 0) + (>= (string-length line) (+ n 2)) + (= (substring line n (+ n 2)) ". "))))) +(define + md/-drop-ol + (fn + (line) + (let + ((n (md/-digits line 0))) + (substring line (+ n 2) (string-length line))))) +(define + md/-ul? + (fn + (line) + (if (ct-starts-with? line "- ") true (ct-starts-with? line "* ")))) +(define + md/-drop-ul + (fn (line) (substring line 2 (string-length line)))) + +(define + md/-plain? + (fn + (line) + (if + (md/-blank? line) + false + (if + (ct-starts-with? line "```") + false + (if + (md/-heading? line) + false + (if + (ct-starts-with? line "> ") + false + (if + (md/-hr? line) + false + (if (md/-ul? line) false (if (md/-ol? line) false true))))))))) + +;; ── multi-line collectors ── +(define + md/-code + (fn + (lines i acc) + (md/-code-collect + (rest lines) + (md/-drop (first lines) "```") + (list) + i + acc))) +(define + md/-code-collect + (fn + (lines lang body i acc) + (cond + ((= (len lines) 0) + (md/-walk + lines + (+ i 1) + (cons (mk-code (md/-id i) lang (md/-join-nl (reverse body))) acc))) + ((= (first lines) "```") + (md/-walk + (rest lines) + (+ i 1) + (cons (mk-code (md/-id i) lang (md/-join-nl (reverse body))) acc))) + (else + (md/-code-collect (rest lines) lang (cons (first lines) body) i acc))))) + +(define + md/-list-collect + (fn + (lines items i acc ordered) + (if + (= (len lines) 0) + (md/-walk + lines + (+ i 1) + (cons (mk-list (md/-id i) ordered (reverse items)) acc)) + (let + ((line (first lines))) + (cond + (ordered + (if + (md/-ol? line) + (md/-list-collect + (rest lines) + (cons (md/-drop-ol line) items) + i + acc + ordered) + (md/-walk + lines + (+ i 1) + (cons (mk-list (md/-id i) ordered (reverse items)) acc)))) + (else + (if + (md/-ul? line) + (md/-list-collect + (rest lines) + (cons (md/-drop-ul line) items) + i + acc + ordered) + (md/-walk + lines + (+ i 1) + (cons (mk-list (md/-id i) ordered (reverse items)) acc))))))))) + +(define + md/-para-collect + (fn + (lines parts i acc) + (if + (= (len lines) 0) + (md/-walk + lines + (+ i 1) + (cons (mk-text (md/-id i) (md/-join-sp (reverse parts))) acc)) + (let + ((line (first lines))) + (if + (md/-plain? line) + (md/-para-collect (rest lines) (cons line parts) i acc) + (md/-walk + lines + (+ i 1) + (cons (mk-text (md/-id i) (md/-join-sp (reverse parts))) acc))))))) + +;; ── main walk ── +(define + md/-walk + (fn + (lines i acc) + (if + (= (len lines) 0) + (reverse acc) + (let + ((line (first lines))) + (cond + ((md/-blank? line) (md/-walk (rest lines) i acc)) + ((ct-starts-with? line "```") (md/-code lines i acc)) + ((md/-heading? line) + (md/-walk + (rest lines) + (+ i 1) + (cons (md/-heading-block line i) acc))) + ((ct-starts-with? line "> ") + (md/-walk + (rest lines) + (+ i 1) + (cons (mk-quote (md/-id i) "" (md/-drop line "> ")) acc))) + ((md/-hr? line) + (md/-walk + (rest lines) + (+ i 1) + (cons (mk-divider (md/-id i)) acc))) + ((md/-ul? line) (md/-list-collect lines (list) i acc false)) + ((md/-ol? line) (md/-list-collect lines (list) i acc true)) + (else (md/-para-collect lines (list) i acc))))))) + +(define + md/parse + (fn (text) (md/-walk (split text (str "\n")) 0 (list)))) + +;; ── adapter ── +(define md/import (fn (text doc-id) (doc-new doc-id (md/parse text)))) +(define content/from-markdown md/import) +(define markdown-adapter {:export (fn (doc) (asMarkdown doc)) :import md/import}) diff --git a/lib/content/scoreboard.json b/lib/content/scoreboard.json index 6fce4919..27c95023 100644 --- a/lib/content/scoreboard.json +++ b/lib/content/scoreboard.json @@ -10,9 +10,10 @@ "crdt": {"pass": 34, "fail": 0}, "crdt-store": {"pass": 14, "fail": 0}, "sync": {"pass": 14, "fail": 0}, + "md-import": {"pass": 24, "fail": 0}, "fed": {"pass": 20, "fail": 0} }, - "total_pass": 294, + "total_pass": 318, "total_fail": 0, - "total": 294 + "total": 318 } diff --git a/lib/content/scoreboard.md b/lib/content/scoreboard.md index 2a3fcb5c..cdc9f735 100644 --- a/lib/content/scoreboard.md +++ b/lib/content/scoreboard.md @@ -14,5 +14,6 @@ _Generated by `lib/content/conformance.sh`_ | crdt | 34 | 0 | 34 | | crdt-store | 14 | 0 | 14 | | sync | 14 | 0 | 14 | +| md-import | 24 | 0 | 24 | | fed | 20 | 0 | 20 | -| **Total** | **294** | **0** | **294** | +| **Total** | **318** | **0** | **318** | diff --git a/lib/content/tests/md-import.sx b/lib/content/tests/md-import.sx new file mode 100644 index 00000000..24572d65 --- /dev/null +++ b/lib/content/tests/md-import.sx @@ -0,0 +1,120 @@ +;; Extension — Markdown import adapter (markdown text -> blocks), inverse of +;; asMarkdown. Round-trips canonical Markdown. + +(st-bootstrap-classes!) +(content/bootstrap!) +(content-bootstrap-markdown!) + +(define nl (str "\n")) + +;; ── headings ── +(define dh (md/import "# Title" "d")) +(content-test "heading import type" (doc-types dh) (list "heading")) +(content-test + "heading level" + (blk-send (doc-find dh "b0") "level") + 1) +(content-test + "heading text" + (str (blk-send (doc-find dh "b0") "text")) + "Title") +(content-test + "h3 import" + (blk-send (doc-find (md/import "### Deep" "d") "b0") "level") + 3) + +;; ── paragraph (consecutive lines join with space) ── +(content-test + "paragraph join" + (str + (blk-send + (doc-find (md/import (str "hello" nl "world") "d") "b0") + "text")) + "hello world") + +;; ── blockquote, divider ── +(content-test + "blockquote" + (str (blk-send (doc-find (md/import "> quoted" "d") "b0") "text")) + "quoted") +(content-test "divider" (doc-types (md/import "---" "d")) (list "divider")) + +;; ── unordered + ordered lists ── +(define dul (md/import (str "- a" nl "- b" nl "- c") "d")) +(content-test "ul type" (doc-types dul) (list "list")) +(content-test + "ul not ordered" + (blk-send (doc-find dul "b0") "ordered") + false) +(content-test + "ul items" + (blk-send (doc-find dul "b0") "items") + (list "a" "b" "c")) +(define dol (md/import (str "1. x" nl "2. y") "d")) +(content-test "ol ordered" (blk-send (doc-find dol "b0") "ordered") true) +(content-test + "ol items" + (blk-send (doc-find dol "b0") "items") + (list "x" "y")) + +;; ── fenced code ── +(define dc (md/import (str "```sx" nl "(+ 1 2)" nl "(* 3 4)" nl "```") "d")) +(content-test "code type" (doc-types dc) (list "code")) +(content-test + "code language" + (str (blk-send (doc-find dc "b0") "language")) + "sx") +(content-test + "code body" + (str (blk-send (doc-find dc "b0") "text")) + (str "(+ 1 2)" nl "(* 3 4)")) + +;; ── multiple blocks separated by blank lines ── +(define dm (md/import (str "# H" nl nl "para" nl nl "- a" nl "- b") "d")) +(content-test "multi types" (doc-types dm) (list "heading" "text" "list")) +(content-test "multi ids" (doc-ids dm) (list "b0" "b1" "b2")) + +;; ── empty / blank input ── +(content-test "empty input" (doc-ids (md/import "" "d")) (list)) +(content-test + "blank lines only" + (doc-ids (md/import (str nl nl) "d")) + (list)) + +;; ── round-trip: import . export == identity (canonical markdown) ── +(define + src + (str + "# Title" + nl + nl + "hello world" + nl + nl + "> quoted" + nl + nl + "- a" + nl + "- b" + nl + nl + "---")) +(content-test "round-trip markdown" (asMarkdown (md/import src "d")) src) +(content-test + "round-trip code" + (asMarkdown (md/import (str "```js" nl "x = 1" nl "```") "d")) + (str "```js" nl "x = 1" nl "```")) + +;; ── adapter form ── +(content-test + "adapter import" + (doc-types (content/import markdown-adapter "# Hi" "d")) + (list "heading")) +(content-test + "adapter export round-trip" + (content/export markdown-adapter (content/import markdown-adapter src "d")) + src) + +;; ── imported doc validates ── +(content-test "imported doc valid" (content/valid? (md/import src "d")) true) diff --git a/plans/content-on-sx.md b/plans/content-on-sx.md index bc103f1f..c9ba47d7 100644 --- a/plans/content-on-sx.md +++ b/plans/content-on-sx.md @@ -19,7 +19,7 @@ injected adapter, not core. ## Status (rolling) -`bash lib/content/conformance.sh` → **294/294** (Phases 1–4 COMPLETE + extensions: HTML/SX escaping, Markdown, durable CRDT replication, validation) +`bash lib/content/conformance.sh` → **318/318** (Phases 1–4 COMPLETE + extensions: HTML/SX escaping, Markdown render+import, durable CRDT replication, validation) ## Ground rules @@ -81,9 +81,17 @@ lib/content/api.sx ── (content/edit) (content/render) (content/history) ─ - [x] Markdown render mode (`asMarkdown:` / `content/render doc "md"`) - [x] durable CRDT replication (`crdt-store.sx`: ops on persist, replay + converge) - [x] document validation (`validate.sx`: ids, per-type fields, duplicate ids) +- [x] Markdown import adapter (`md-import.sx`: text → blocks, round-trips export) ## Progress log +- 2026-06-07 — Extension: Markdown import adapter (`md-import.sx`), inverse of + asMarkdown. Line-based parser: ATX headings, fenced code (```lang), blockquotes, + unordered/ordered lists (grouping consecutive items), thematic breaks, + paragraphs (consecutive plain lines joined with a space). Sequential ids + b0,b1…. `md/import` / `content/from-markdown` / `markdown-adapter` (import + + asMarkdown export). Round-trips canonical Markdown (import∘export == identity); + imported docs pass validation. 24 tests; suite 318/318. - 2026-06-07 — Extension: document validation (`validate.sx`). `content/validate` returns issue dicts `{:id :kind :detail}` (empty = valid); `content/valid?` and `content/issue-kinds` convenience. Checks block id (non-empty string),