From 498ec006fe76a2127c8b8dad97fc77a12e59547f Mon Sep 17 00:00:00 2001 From: giles Date: Tue, 30 Jun 2026 23:40:11 +0000 Subject: [PATCH] =?UTF-8?q?host:=20blog=20edge=20graph=20is=20KV-only=20?= =?UTF-8?q?=E2=80=94=20drop=20the=20per-write=20Datalog=20re-saturation=20?= =?UTF-8?q?(major=20perf)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A REAL production perf bug, surfaced while profiling slow conformance. host/blog--add-edge! mirrored every edge into lib/relations via relations/relate, which RE-SATURATES the whole CEK-interpreted Datalog ruleset on every single write — super-linear in the fact base (profiled: 1.1s → 3.5s → 6.1s per edge as the graph grows 10→20→30 facts; O(graph) per write, O(edges²) to build). This hit the LIVE SITE on every content op: importing a Ghost post (decompose! = ~4 edges/block), tagging, relating, is-a, the metamodel editor — all getting slower as the site grows. Since typing now reads direct KV edges (host/blog--subtype-closure et al.), NOTHING in the blog domain reads lib/relations anymore — the mirror was pure, very expensive dead weight. So edges are now KV-only: add/del-edge! just kv-put/kv-delete (~20ms FLAT, O(1)); reads already walk the edge:* rows directly. host/blog-load-edges! (which replayed every edge into lib/relations on boot — O(edges²)) is now a no-op. conj/disj operands were already KV-only, proving the whole graph can be. host/relations.sx (the relations DOMAIN service, its own type:id nodes) is separate and untouched. Result: blog-relate! 6.1s→20ms/call (and now FLAT, not growing); full blog suite ~23min→19s; all 11 host suites 353/355 in 36s (the 2 fails are the pre-existing relate-picker pair). Live writes drop from seconds to ~20ms. Pairs with the typing-reads-from-KV fix (prev commit). Co-Authored-By: Claude Opus 4.8 --- lib/host/blog.sx | 55 ++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 37 deletions(-) diff --git a/lib/host/blog.sx b/lib/host/blog.sx index b765e9c8..27f0dc77 100644 --- a/lib/host/blog.sx +++ b/lib/host/blog.sx @@ -187,24 +187,24 @@ (define host/blog--kind-symmetric? (fn (kind) (let ((s (host/blog--kind-spec kind))) (and s (get s :symmetric))))) -;; ── edges (parameterised by kind, DURABLE) ────────────────────────── -;; lib/relations holds the graph in memory (a Datalog cache that re-saturates per -;; query); it does NOT survive a restart. So the host owns the durable source of -;; truth: every physical edge is also a KV row "edge:||" in the -;; blog store, replayed into the in-memory graph on boot (host/blog-load-edges!). -;; '|' is a safe delimiter — slugs are [a-z0-9-], kinds are registry names. +;; ── edges (parameterised by kind, DURABLE, KV-only) ───────────────── +;; The blog graph is the durable KV: every edge is a row "edge:||" in the +;; blog store, and ALL reads walk those rows directly (host/blog--all-edges / -out / -in / +;; --subtype-closure). It is NOT mirrored into lib/relations: relations/relate re-saturates +;; the whole Datalog ruleset on EVERY write (super-linear in the fact base — profiled at +;; 1→3→6s per edge as the graph grows), and since typing now reads direct KV edges, nothing +;; in the blog domain reads lib/relations, so the mirror was pure (very expensive) dead +;; weight. KV-only edge writes are ~20ms flat. '|' is a safe delimiter — slugs are +;; [a-z0-9-], kinds are registry names. (host/relations.sx, the relations DOMAIN service, is +;; separate: its own "type:id" nodes in lib/relations, untouched by this.) (define host/blog--edge-key (fn (src kind dst) (str "edge:" src "|" kind "|" dst))) (define host/blog--add-edge! (fn (src dst kind) - (begin - (relations/relate (host/blog--node src) (host/blog--node dst) (string->symbol kind)) - (persist/backend-kv-put host/blog-store (host/blog--edge-key src kind dst) 1)))) + (persist/backend-kv-put host/blog-store (host/blog--edge-key src kind dst) 1))) (define host/blog--del-edge! (fn (src dst kind) - (begin - (relations/unrelate (host/blog--node src) (host/blog--node dst) (string->symbol kind)) - (persist/backend-kv-delete host/blog-store (host/blog--edge-key src kind dst))))) + (persist/backend-kv-delete host/blog-store (host/blog--edge-key src kind dst)))) ;; A symmetric kind writes both directions, so children alone read it from either ;; side; a directed kind writes one edge (the inverse is host/blog-in). @@ -219,31 +219,12 @@ (host/blog--del-edge! a b kind) (when (host/blog--kind-symmetric? kind) (host/blog--del-edge! b a kind))))) -;; rebuild the in-memory graph from the durable edge store — called on boot, after -;; the store is pointed at the durable backend. Each "edge:||" key -;; is re-applied directly (both directions of a symmetric kind are stored, so no -;; symmetry re-derivation is needed here). -(define host/blog-load-edges! - (fn () - (for-each - (fn (key) - (let ((body (substr key 5))) ;; drop "edge:" - (let ((p1 (index-of body "|"))) - (when (>= p1 0) - (let ((src (substr body 0 p1)) - (tail (substr body (+ p1 1)))) - (let ((p2 (index-of tail "|"))) - (when (>= p2 0) - (let ((ek (substr tail 0 p2))) - ;; conj/disj are structural (type-algebra operands) — KV-only, - ;; never replayed into the Datalog graph (it re-saturates per query). - (when (not (or (= ek "conj") (= ek "disj"))) - (relations/relate - (host/blog--node src) - (host/blog--node (substr tail (+ p2 1))) - (string->symbol ek))))))))))) - (filter (fn (k) (starts-with? k "edge:")) - (persist/backend-kv-keys host/blog-store))))) +;; No-op: the durable KV edge rows ARE the graph and every read walks them directly, so +;; there is no in-memory lib/relations graph to rebuild on boot. (Kept as a callable seam — +;; serve.sh calls it after pointing the store at the durable backend — in case a future +;; index/cache needs warming.) Previously this replayed every edge into lib/relations via +;; relations/relate, which re-saturated the Datalog ruleset per edge: O(edges²) boot cost. +(define host/blog-load-edges! (fn () nil)) ;; nodes -> existing blog slugs: strip "blog:", drop non-blog and deleted targets. ;; Existence is one kv-keys read (host/blog-slugs), NOT a perform per candidate —