From 0a2f1a61d1a9585e82dc0c2d815f678997cd716c Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 28 Jun 2026 18:52:56 +0000 Subject: [PATCH] =?UTF-8?q?host:=20typed=20relations=20=E2=80=94=20Phase?= =?UTF-8?q?=206=20(schema=20validation)=20+=20post-page=20perf=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 6 — gradual schema validation made real: - host/blog-type-schemas now carries a declarative schema (a list of {:block :msg} required-element rules); "article" requires an h1. - host/blog--all-tags / --schema-issues / host/blog-type-issues walk the parsed content and report each missing required block; host/blog-type-valid? = no issues. A type with no schema imposes nothing (gradual). - seed an "article" type-post (article subtype-of type). edit-submit now lists the specific schema issues on a 400 ("an article needs a heading"), so a post that is-a article must satisfy it on save. Post-page performance (the unresponsiveness): a post page was ~1s even with no relations and no load — NOT CPU (render-page ~2ms, in-memory handler ~5ms) but the DURABLE read path: host/blog--relation-blocks called host/blog-out/in, each re-scanning the whole KV (host/blog-slugs + an all-edges scan), so a page did ~7 kv-keys performs deep in the call stack. Each durable perform routes through cek_run_with_io and is costly there. Fixes: - host/blog-out/in read DIRECT edges from the durable edge store (string scan), not lib/relations (whose queries re-saturate the Datalog ruleset, ~seconds). - host/blog--relation-blocks reads the KV key list ONCE and derives both the post set and the edges in memory (host/blog--edges-for / --recs-slugs), one kv-keys plus a host/blog-get per linked post. Post pages: ~1s -> ~0.02s (46x); live 11-135s -> ~0.15s. lib/relations stays for TRANSITIVE queries only. - conformance timeout 300 -> 600s: the relations-heavy blog suite is CPU-bound under shared-box contention and was tripping a false truncation at 300. 271/271 (blog 100). Verified live: post pages fast, Tags/Related/Tagged-with-this render, schema rejection works. Co-Authored-By: Claude Opus 4.8 --- lib/host/blog.sx | 203 +++++++++++++++++++++++++++++----------- lib/host/conformance.sh | 5 +- lib/host/tests/blog.sx | 31 ++++++ 3 files changed, 183 insertions(+), 56 deletions(-) diff --git a/lib/host/blog.sx b/lib/host/blog.sx index 6a13a841..222e54f7 100644 --- a/lib/host/blog.sx +++ b/lib/host/blog.sx @@ -175,13 +175,47 @@ (map (fn (n) (substr (symbol->string n) 5)) (filter (fn (n) (starts-with? (symbol->string n) "blog:")) nodes)))))) -;; outgoing targets / incoming sources of `slug` under `kind`, as slug lists. +;; DIRECT edges come from the durable edge store, NOT lib/relations: each relations +;; query re-runs the (CEK-interpreted) ruleset — ~seconds even on a tiny graph — +;; whereas the edge:|| KV rows are a cheap string scan. lib/relations +;; is reserved for TRANSITIVE queries (descendants/ancestors). The two are always +;; in sync: host/blog-relate! writes both, and a plain blog edge has no derived +;; effective edges, so KV == relations/children for direct lookups. +(define host/blog--parse-edge-key + (fn (k) + (if (starts-with? k "edge:") + (let ((body (substr k 5))) + (let ((p1 (index-of body "|"))) + (if (< p1 0) nil + (let ((rest (substr body (+ p1 1)))) + (let ((p2 (index-of rest "|"))) + (if (< p2 0) nil + {:src (substr body 0 p1) + :kind (substr rest 0 p2) + :dst (substr rest (+ p2 1))})))))) + nil))) +(define host/blog--all-edges + (fn () + (filter (fn (e) (not (nil? e))) + (map host/blog--parse-edge-key (persist/backend-kv-keys host/blog-store))))) + +;; outgoing targets / incoming sources of `slug` under `kind`, as existing slugs. (define host/blog-out (fn (slug kind) - (host/blog--edge-slugs (relations/children (host/blog--node slug) (string->symbol kind))))) + (let ((existing (host/blog-slugs))) + (filter (fn (s) (contains? existing s)) + (reduce (fn (acc e) + (if (and (= (get e :src) slug) (= (get e :kind) kind)) + (concat acc (list (get e :dst))) acc)) + (list) (host/blog--all-edges)))))) (define host/blog-in (fn (slug kind) - (host/blog--edge-slugs (relations/parents (host/blog--node slug) (string->symbol kind))))) + (let ((existing (host/blog-slugs))) + (filter (fn (s) (contains? existing s)) + (reduce (fn (acc e) + (if (and (= (get e :dst) slug) (= (get e :kind) kind)) + (concat acc (list (get e :src))) acc)) + (list) (host/blog--all-edges)))))) ;; back-compat: "related posts" is just the symmetric "related" kind. (define host/blog-related (fn (slug) (host/blog-out slug "related"))) @@ -229,19 +263,45 @@ (define host/blog-tags (fn (slug) (host/blog-out slug "tagged"))) ;; a post's tags (define host/blog-tagged-with (fn (tag) (host/blog-in tag "tagged"))) ;; posts with a tag -;; ── gradual validation seam ───────────────────────────────────────── -;; A type-post optionally carries a schema: a predicate over content. The map is -;; empty for now — validation is gradual, types accrue schemas later — but the -;; hook lives here so enforcement is a one-line addition, never a retrofit. A post -;; is type-valid when every schema implied by its types accepts the content; with -;; no schemas this is vacuously true, so it costs nothing until a type opts in. -(define host/blog-type-schemas {}) +;; ── gradual validation: declarative type schemas ─────────────────── +;; A type may carry a SCHEMA: a list of rules {:block :msg }, each +;; requiring the content to contain (anywhere) an element of that tag. A post is +;; checked against the schema of every type it is-a; a type with no schema imposes +;; nothing (gradual). Schemas are declarative data (not opaque predicates) so they +;; yield a specific, human error — and could later be stored ON the type-post. +(define host/blog-type-schemas + {:article (list {:block "h1" :msg "an article needs a heading (h1)"})}) (define host/blog-schema-of (fn (type-slug) (get host/blog-type-schemas type-slug))) -(define host/blog-type-valid? + +;; every element tag in a parsed content tree, recursively (the heads of nested +;; lists) — so "requires h1" matches an h1 even inside an article/section wrapper. +(define host/blog--all-tags + (fn (tree) + (if (and (= (type-of tree) "list") (> (len tree) 0)) + (concat (list (str (first tree))) + (reduce (fn (acc c) (concat acc (host/blog--all-tags c))) (list) (rest tree))) + (list)))) + +;; the :msg of each required :block a schema asks for but the content lacks. +(define host/blog--schema-issues + (fn (schema content) + (let ((tags (host/blog--all-tags (parse-safe content)))) + (reduce + (fn (acc rule) + (if (contains? tags (get rule :block)) acc (concat acc (list (get rule :msg))))) + (list) schema)))) + +;; all schema issues for a post = the union over every type it is-a that carries a +;; schema. Empty = valid; vacuous (and cheap) when no type has a schema. +(define host/blog-type-issues (fn (slug content) - (every? - (fn (t) (let ((s (host/blog-schema-of t))) (or (nil? s) (s content)))) - (host/blog-types-of slug)))) + (reduce + (fn (acc t) + (let ((s (host/blog-schema-of t))) + (if s (concat acc (host/blog--schema-issues s content)) acc))) + (list) (host/blog-types-of slug)))) +(define host/blog-type-valid? + (fn (slug content) (= (len (host/blog-type-issues slug content)) 0))) ;; Seed the root type-posts: "type" (the root) and "tag" (a kind of type). Types ;; ARE posts, so these are real posts that document themselves; tag subtype-of @@ -256,7 +316,13 @@ (host/blog-seed! "tag" "Tag" "(article (h1 \"Tag\") (p \"A tag is a kind of type (tag subtype-of type), so anything that is-a tag is also a type. A post is tagged with a tag; a tag post documents the tag and lists what is tagged with it.\"))" "published") - (host/blog-relate! "tag" "type" "subtype-of")))) + (host/blog-relate! "tag" "type" "subtype-of") + ;; "article" — a type WITH a schema (requires a heading). Posts that is-a + ;; article are validated against it on save (gradual typing in action). + (host/blog-seed! "article" "Article" + "(article (h1 \"Article\") (p \"A kind of post that must have a heading. A post that is-a article is checked against this type's schema on save — gradual typing: declaring the type adds the requirement, and the next edit must satisfy it.\"))" + "published") + (host/blog-relate! "article" "type" "subtype-of")))) ;; ── relate picker (filterable, paginated candidate list) ──────────── ;; Candidates to relate `slug` to: every post except itself and ones already @@ -372,31 +438,53 @@ (map (fn (n) (substr (symbol->string n) 5)) (filter (fn (n) (starts-with? (symbol->string n) "blog:")) nodes)))))) -;; ALL of a post's relation blocks, generated by ITERATING the registry: each -;; kind contributes its outgoing block (label) and, if it has an inverse, its -;; incoming block (inverse-label). Empty blocks are dropped. So adding a kind to -;; the registry makes it render automatically — no handler edit. One kv-keys read -;; up front; the relation lookups are in-memory. Returns a wrapper div, or "". +;; The relation blocks shown on a POST page — a CURATED, fixed set: Related (out), +;; Tags (out), Tagged-with-this (in). PERFORMANCE: read the KV key list ONCE and +;; derive both the post set and the edges from it in memory, instead of letting +;; each host/blog-out/in re-scan the store. Every durable read is a perform routed +;; through cek_run_with_io (costly deep in the call stack), so the post page must +;; minimise them — this does ONE kv-keys plus a host/blog-get per linked post. +(define host/blog--post-relation-specs + (list {:kind "related" :dir "out" :label "Related posts"} + {:kind "tagged" :dir "out" :label "Tags"} + {:kind "tagged" :dir "in" :label "Tagged with this"})) +;; in-memory: the slug list (out: dst, in: src) for `slug` under `kind` from +;; pre-parsed edges — no perform. +(define host/blog--edges-for + (fn (edges slug kind dir) + (reduce + (fn (acc e) + (if (= (get e :kind) kind) + (if (= dir "out") + (if (= (get e :src) slug) (concat acc (list (get e :dst))) acc) + (if (= (get e :dst) slug) (concat acc (list (get e :src))) acc)) + acc)) + (list) edges))) +;; slug list -> {:slug :title} records (existence-filtered), one host/blog-get each. +(define host/blog--recs-slugs + (fn (existing slugs) + (map (fn (s) {:slug s :title (get (host/blog-get s) :title)}) + (filter (fn (s) (contains? existing s)) slugs)))) (define host/blog--relation-blocks (fn (slug) - (let ((existing (host/blog-slugs)) - (node (host/blog--node slug))) - (let ((blocks - (reduce - (fn (acc spec) - (let ((k (string->symbol (get spec :kind)))) - (let ((out-b (host/blog--edges-block - (host/blog--recs existing (relations/children node k)) - (get spec :label))) - (in-b (if (get spec :inverse-label) - (host/blog--edges-block - (host/blog--recs existing (relations/parents node k)) - (get spec :inverse-label)) - ""))) - (concat acc (filter (fn (b) (not (= b ""))) (list out-b in-b)))))) - (list) - host/blog-rel-kinds))) - (if (> (len blocks) 0) (cons (quote div) blocks) ""))))) + (let ((keys (persist/backend-kv-keys host/blog-store))) ;; ONE durable read + (let ((existing (reduce (fn (acc k) + (if (starts-with? k "blog:") + (concat acc (list (substr k 5))) acc)) + (list) keys)) + (edges (filter (fn (e) (not (nil? e))) + (map host/blog--parse-edge-key keys)))) + (let ((blocks + (reduce + (fn (acc spec) + (let ((b (host/blog--edges-block + (host/blog--recs-slugs existing + (host/blog--edges-for edges slug (get spec :kind) (get spec :dir))) + (get spec :label)))) + (if (= b "") acc (concat acc (list b))))) + (list) + host/blog--post-relation-specs))) + (if (> (len blocks) 0) (cons (quote div) blocks) "")))))) ;; the relation section for the post page: the blocks, or — when empty and the ;; viewer is logged in — a subtle "add some" hint; nothing for anonymous viewers. @@ -777,26 +865,31 @@ ;; create paths (unparseable body -> 400, post left intact). Slug is preserved. (define host/blog-edit-submit (fn (req) - (let ((slug (dream-param req "slug"))) - (let ((r (host/blog-get slug))) - (if (nil? r) - (dream-html-status 404 - (host/blog--page "Not found" - (quasiquote (div (h1 "404") (p (unquote (str "No post: " slug))))))) - (let ((title (or (dream-form-field req "title") (get r :title))) - (sx-content (or (dream-form-field req "sx_content") "")) - (status (or (dream-form-field req "status") (get r :status)))) - ;; parse-valid AND type-valid (the post's types' schemas accept the - ;; content — vacuous until a type opts into a schema). - (if (and (host/blog-content-ok? sx-content) - (host/blog-type-valid? slug sx-content)) + (let ((slug (dream-param req "slug")) (r (host/blog-get (dream-param req "slug")))) + (if (nil? r) + (dream-html-status 404 + (host/blog--page "Not found" + (quasiquote (div (h1 "404") (p (unquote (str "No post: " slug))))))) + (let ((title (or (dream-form-field req "title") (get r :title))) + (sx-content (or (dream-form-field req "sx_content") "")) + (status (or (dream-form-field req "status") (get r :status)))) + ;; collect issues up front (perform): unparseable markup, then each + ;; schema requirement the post's types impose. Empty = save. + (let ((issues (if (host/blog-content-ok? sx-content) + (host/blog-type-issues slug sx-content) + (list "Post body is not valid SX markup.")))) + (if (= (len issues) 0) (begin (host/blog-put! slug title sx-content status) (dream-redirect (str "/" slug "/"))) - (dream-html-status 400 - (host/blog--page "Error" - (quasiquote (div (h1 "Error") (p "Post body is not valid SX markup.") - (p (a :href (unquote (str "/" slug "/edit")) "Back"))))))))))))) + (let ((issue-items (map (fn (i) (quasiquote (li (unquote i)))) issues))) + (dream-html-status 400 + (host/blog--page "Cannot save" + (quasiquote + (div (h1 "Cannot save") + (p "This post can't be saved yet:") + (unquote (list (quote ul) issue-items)) + (p (a :href (unquote (str "/" slug "/edit")) "Back")))))))))))))) ;; ── routes ────────────────────────────────────────────────────────── ;; Public reads + the create form. /, /posts, /new BEFORE /:slug (catch-all). diff --git a/lib/host/conformance.sh b/lib/host/conformance.sh index 0a4ad863..422cf9eb 100755 --- a/lib/host/conformance.sh +++ b/lib/host/conformance.sh @@ -113,7 +113,10 @@ emit_eval () { echo "(epoch $EPOCH)"; echo "(eval \"$1\")"; EPOCH=$((EPOCH+1)); done } > "$TMPFILE" -OUTPUT=$(timeout 300 "$SX_SERVER" < "$TMPFILE" 2>&1 || true) +# 600s: the blog suite drives the relations graph hard (every is-a/types-of/ +# instances-of query re-saturates the Datalog db), so it's CPU-bound and slower +# under shared-box contention. 300s was tripping a false truncation. +OUTPUT=$(timeout 600 "$SX_SERVER" < "$TMPFILE" 2>&1 || true) # Fail LOUD on any load/eval error. A test file that errors mid-load silently # truncates its suite — the runner returns only the tests that ran before the diff --git a/lib/host/tests/blog.sx b/lib/host/tests/blog.sx index 03463d7c..2434ec16 100644 --- a/lib/host/tests/blog.sx +++ b/lib/host/tests/blog.sx @@ -411,6 +411,37 @@ (host-bl-test "/tags is 200 (not shadowed by /:slug)" (dream-status (host-bl-app (host-bl-req "/tags"))) 200) +;; -- Phase 6: gradual schema validation -- +(host/blog-seed-types!) ;; ensures the "article" type + its schema (requires h1) +(host-bl-test "all-tags finds nested element tags" + (let ((tags (host/blog--all-tags (parse-safe "(article (h1 \"T\") (p \"x\"))")))) + (list (contains? tags "h1") (contains? tags "p") (contains? tags "section"))) + (list true true false)) +(host-bl-test "schema-issues: missing required block -> 1 issue; present -> 0" + (let ((sch (host/blog-schema-of "article"))) + (list (len (host/blog--schema-issues sch "(p \"no heading\")")) + (len (host/blog--schema-issues sch "(article (h1 \"yes\"))")))) + (list 1 0)) +(host-bl-test "type-valid? enforces an is-a article's schema" + (begin + (host/blog-put! "art1" "Art 1" "(p \"x\")" "published") + (host/blog-relate! "art1" "article" "is-a") + (list (host/blog-type-valid? "art1" "(p \"no heading\")") + (host/blog-type-valid? "art1" "(article (h1 \"H\") (p \"x\"))"))) + (list false true)) +(host-bl-test "a post with no schema'd type is vacuously valid" + (host/blog-type-valid? "ppost" "(p \"anything\")") true) +(host-bl-test "edit-submit rejects content violating the type schema (not saved)" + (begin + (host-bl-wapp (host-bl-send "POST" "/art1/edit" "Bearer good" + "application/x-www-form-urlencoded" "sx_content=(p+%22still+no+heading%22)")) + (contains? (dream-resp-body (host-bl-wapp (host-bl-req "/art1/"))) "still no heading")) + false) +(host-bl-test "edit-submit accepts content satisfying the schema -> 303" + (dream-status (host-bl-wapp (host-bl-send "POST" "/art1/edit" "Bearer good" + "application/x-www-form-urlencoded" "sx_content=(article+(h1+%22Heading%22)+(p+%22body%22))"))) + 303) + ;; -- experimental unguarded create-only route (POST /new, no auth) -- (define host-bl-oapp (host/make-app (list host/blog-open-create-routes host/blog-routes))) (host/blog-use-store! (persist/open))