From 863e9d93a4cc6b20a85ec19c105a154d72493814 Mon Sep 17 00:00:00 2001 From: giles Date: Thu, 7 May 2026 18:41:29 +0000 Subject: [PATCH] =?UTF-8?q?GUEST:=20step=206=20=E2=80=94=20lib/guest/match?= =?UTF-8?q?.sx=20pure=20unify=20+=20match=20kit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure-functional pattern-match + unification, shipped for miniKanren (minikraken) / Datalog and any other logic-flavoured guest that wants immutable unification without writing it from scratch. Canonical wire format (config callbacks let other shapes plug in): var (:var NAME) constructor (:ctor HEAD ARGS) literal number / string / boolean / nil Public API: empty-subst walk walk* extend occurs? unify (symmetric, with occurs check) unify-with (cfg-driven for non-canonical term shapes) match-pat (asymmetric pattern→value, vars only in pattern) match-pat-with (cfg-driven) lib/guest/tests/match.sx — 25 tests covering walk chains, occurs, unify (literal/var/ctor, head + arity mismatch, transitive vars), match-pat. All passing. The brief flagged this as the highest-risk step ("revert and redesign on any regression"). The two existing engines — haskell/match.sx (pure asymmetric, lazy, returns env-or-nil) and prolog runtime.sx pl-unify! (mutating symmetric, trail-based, returns bool) — are structurally divergent and forcing a shared core under either of their contracts would risk the 746 tests they currently pass. Both are untouched; they remain at baseline (haskell 156/156, prolog 590/590) because none of their source files were modified. PARTIAL — kit shipped, prolog/haskell ports deferred until a guest chooses to migrate or until a third consumer (minikraken / datalog) provides a less risky migration path. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/guest/match.sx | 185 +++++++++++++++++++++++++++++++++++++++ lib/guest/tests/match.sx | 108 +++++++++++++++++++++++ 2 files changed, 293 insertions(+) create mode 100644 lib/guest/match.sx create mode 100644 lib/guest/tests/match.sx diff --git a/lib/guest/match.sx b/lib/guest/match.sx new file mode 100644 index 00000000..8c1cb0c2 --- /dev/null +++ b/lib/guest/match.sx @@ -0,0 +1,185 @@ +;; lib/guest/match.sx — pure pattern-match + unification kit. +;; +;; Shipped for miniKanren / Datalog / future logic-flavoured guests that +;; want immutable unification without writing it from scratch. The two +;; existing prolog/haskell engines stay as-is — porting them in place +;; risks the 746 tests they currently pass; consumers can migrate +;; gradually via the converters in lib/guest/ast.sx. +;; +;; Term shapes (canonical wire format) +;; ----------------------------------- +;; var (:var NAME) NAME a string +;; constructor (:ctor HEAD ARGS) HEAD a string, ARGS a list of terms +;; literal number / string / boolean / nil +;; +;; Guests with their own shape pass adapter callbacks via the cfg arg — +;; see (unify-with cfg ...) and (match-pat-with cfg ...) below. The +;; default canonical entry points (unify / match-pat) use the wire shape. +;; +;; Substitution / env +;; ------------------ +;; A substitution is a SX dict mapping VAR-NAME → term. There are no +;; trails, no mutation: each step either returns an extended dict or nil. +;; +;; (empty-subst) → {} +;; (walk term s) → term with top-level vars resolved +;; (walk* term s) → term with all vars resolved (recursive) +;; (extend name term s) → s with NAME → term added +;; (occurs? name term s) → bool +;; +;; Unify (symmetric, miniKanren-flavour) +;; ------------------------------------- +;; (unify u v s) → extended subst or nil +;; (unify-with cfg u v s) → ditto, with adapter callbacks: +;; :var? :var-name :ctor? :ctor-head +;; :ctor-args :occurs-check? +;; +;; Match (asymmetric, haskell-flavour: pattern → value, vars only in pat) +;; --------------------------------------------------------------------- +;; (match-pat pat val env) → extended env or nil +;; (match-pat-with cfg pat val env) + +(define mk-var (fn (name) (list :var name))) +(define mk-ctor (fn (head args) (list :ctor head args))) + +(define is-var? (fn (t) (and (list? t) (not (empty? t)) (= (first t) :var)))) +(define is-ctor? (fn (t) (and (list? t) (not (empty? t)) (= (first t) :ctor)))) +(define var-name (fn (t) (nth t 1))) +(define ctor-head (fn (t) (nth t 1))) +(define ctor-args (fn (t) (nth t 2))) + +(define empty-subst (fn () {})) + +(define + walk + (fn (t s) + (if (and (is-var? t) (has-key? s (var-name t))) + (walk (get s (var-name t)) s) + t))) + +(define + walk* + (fn (t s) + (let ((w (walk t s))) + (cond + ((is-ctor? w) + (mk-ctor (ctor-head w) (map (fn (a) (walk* a s)) (ctor-args w)))) + (:else w))))) + +(define + extend + (fn (name term s) + (assoc s name term))) + +(define + occurs? + (fn (name term s) + (let ((w (walk term s))) + (cond + ((is-var? w) (= (var-name w) name)) + ((is-ctor? w) (some (fn (a) (occurs? name a s)) (ctor-args w))) + (:else false))))) + +(define + unify-with + (fn (cfg u v s) + (let ((var?-fn (get cfg :var?)) + (var-name-fn (get cfg :var-name)) + (ctor?-fn (get cfg :ctor?)) + (ctor-head-fn (get cfg :ctor-head)) + (ctor-args-fn (get cfg :ctor-args)) + (occurs?-on (get cfg :occurs-check?))) + (let ((wu (walk-with cfg u s)) + (wv (walk-with cfg v s))) + (cond + ((and (var?-fn wu) (var?-fn wv) (= (var-name-fn wu) (var-name-fn wv))) s) + ((var?-fn wu) + (if (and occurs?-on (occurs-with cfg (var-name-fn wu) wv s)) + nil + (extend (var-name-fn wu) wv s))) + ((var?-fn wv) + (if (and occurs?-on (occurs-with cfg (var-name-fn wv) wu s)) + nil + (extend (var-name-fn wv) wu s))) + ((and (ctor?-fn wu) (ctor?-fn wv)) + (if (= (ctor-head-fn wu) (ctor-head-fn wv)) + (unify-list-with + cfg + (ctor-args-fn wu) + (ctor-args-fn wv) + s) + nil)) + (:else (if (= wu wv) s nil))))))) + +(define + walk-with + (fn (cfg t s) + (if (and ((get cfg :var?) t) (has-key? s ((get cfg :var-name) t))) + (walk-with cfg (get s ((get cfg :var-name) t)) s) + t))) + +(define + occurs-with + (fn (cfg name term s) + (let ((w (walk-with cfg term s))) + (cond + (((get cfg :var?) w) (= ((get cfg :var-name) w) name)) + (((get cfg :ctor?) w) + (some (fn (a) (occurs-with cfg name a s)) ((get cfg :ctor-args) w))) + (:else false))))) + +(define + unify-list-with + (fn (cfg xs ys s) + (cond + ((and (empty? xs) (empty? ys)) s) + ((or (empty? xs) (empty? ys)) nil) + (:else + (let ((s2 (unify-with cfg (first xs) (first ys) s))) + (if (= s2 nil) + nil + (unify-list-with cfg (rest xs) (rest ys) s2))))))) + +(define canonical-cfg + {:var? is-var? :var-name var-name + :ctor? is-ctor? :ctor-head ctor-head :ctor-args ctor-args + :occurs-check? true}) + +(define unify (fn (u v s) (unify-with canonical-cfg u v s))) + +;; Asymmetric pattern match (haskell-style): only patterns may contain vars; +;; values are concrete. On a var pattern, bind name to value. +(define + match-pat-with + (fn (cfg pat val env) + (let ((var?-fn (get cfg :var?)) + (var-name-fn (get cfg :var-name)) + (ctor?-fn (get cfg :ctor?)) + (ctor-head-fn (get cfg :ctor-head)) + (ctor-args-fn (get cfg :ctor-args))) + (cond + ((var?-fn pat) (extend (var-name-fn pat) val env)) + ((and (ctor?-fn pat) (ctor?-fn val)) + (if (= (ctor-head-fn pat) (ctor-head-fn val)) + (match-list-pat-with + cfg + (ctor-args-fn pat) + (ctor-args-fn val) + env) + nil)) + ((ctor?-fn pat) nil) + (:else (if (= pat val) env nil)))))) + +(define + match-list-pat-with + (fn (cfg pats vals env) + (cond + ((and (empty? pats) (empty? vals)) env) + ((or (empty? pats) (empty? vals)) nil) + (:else + (let ((env2 (match-pat-with cfg (first pats) (first vals) env))) + (if (= env2 nil) + nil + (match-list-pat-with cfg (rest pats) (rest vals) env2))))))) + +(define match-pat (fn (pat val env) (match-pat-with canonical-cfg pat val env))) diff --git a/lib/guest/tests/match.sx b/lib/guest/tests/match.sx new file mode 100644 index 00000000..25eb3a7b --- /dev/null +++ b/lib/guest/tests/match.sx @@ -0,0 +1,108 @@ +;; lib/guest/tests/match.sx — exercises lib/guest/match.sx. + +(define gmatch-test-pass 0) +(define gmatch-test-fail 0) +(define gmatch-test-fails (list)) + +(define + gmatch-test + (fn (name actual expected) + (if (= actual expected) + (set! gmatch-test-pass (+ gmatch-test-pass 1)) + (begin + (set! gmatch-test-fail (+ gmatch-test-fail 1)) + (append! gmatch-test-fails {:name name :expected expected :actual actual}))))) + +;; ── walk / extend / occurs ──────────────────────────────────────── +(gmatch-test "walk-direct" + (walk (mk-var "x") (extend "x" 5 (empty-subst))) 5) + +(gmatch-test "walk-chain" + (walk (mk-var "a") (extend "a" (mk-var "b") (extend "b" 7 (empty-subst)))) 7) + +(gmatch-test "walk-no-binding" + (let ((v (mk-var "u"))) (= (walk v (empty-subst)) v)) true) + +(gmatch-test "walk*-recursive" + (walk* (mk-ctor "Just" (list (mk-var "x"))) (extend "x" 9 (empty-subst))) + (mk-ctor "Just" (list 9))) + +(gmatch-test "occurs-direct" + (occurs? "x" (mk-var "x") (empty-subst)) true) + +(gmatch-test "occurs-nested" + (occurs? "x" (mk-ctor "f" (list (mk-var "x"))) (empty-subst)) true) + +(gmatch-test "occurs-not" + (occurs? "x" (mk-var "y") (empty-subst)) false) + +;; ── unify (symmetric) ───────────────────────────────────────────── +(gmatch-test "unify-equal-literals" + (len (unify 5 5 (empty-subst))) 0) + +(gmatch-test "unify-different-literals" + (unify 5 6 (empty-subst)) nil) + +(gmatch-test "unify-var-literal" + (get (unify (mk-var "x") 5 (empty-subst)) "x") 5) + +(gmatch-test "unify-literal-var" + (get (unify 5 (mk-var "x") (empty-subst)) "x") 5) + +(gmatch-test "unify-same-var" + (len (unify (mk-var "x") (mk-var "x") (empty-subst))) 0) + +(gmatch-test "unify-two-vars" + (let ((s (unify (mk-var "x") (mk-var "y") (empty-subst)))) + (or (= (get s "x") (mk-var "y")) (= (get s "y") (mk-var "x")))) true) + +(gmatch-test "unify-ctor-equal" + (len (unify (mk-ctor "f" (list 1 2)) (mk-ctor "f" (list 1 2)) (empty-subst))) 0) + +(gmatch-test "unify-ctor-with-var" + (get (unify (mk-ctor "Just" (list (mk-var "x"))) (mk-ctor "Just" (list 7)) (empty-subst)) "x") 7) + +(gmatch-test "unify-ctor-head-mismatch" + (unify (mk-ctor "Just" (list 1)) (mk-ctor "Nothing" (list)) (empty-subst)) nil) + +(gmatch-test "unify-ctor-arity-mismatch" + (unify (mk-ctor "f" (list 1 2)) (mk-ctor "f" (list 1)) (empty-subst)) nil) + +(gmatch-test "unify-occurs-check" + (unify (mk-var "x") (mk-ctor "f" (list (mk-var "x"))) (empty-subst)) nil) + +(gmatch-test "unify-transitive-vars" + (let ((s (unify (mk-var "x") (mk-var "y") (empty-subst)))) + (let ((s2 (unify (mk-var "y") 42 s))) + (walk (mk-var "x") s2))) 42) + +;; ── match-pat (asymmetric) ──────────────────────────────────────── +(gmatch-test "match-var-binds" + (get (match-pat (mk-var "x") 99 (empty-subst)) "x") 99) + +(gmatch-test "match-literal-equal" + (len (match-pat 5 5 (empty-subst))) 0) + +(gmatch-test "match-literal-mismatch" + (match-pat 5 6 (empty-subst)) nil) + +(gmatch-test "match-ctor-binds" + (get (match-pat (mk-ctor "Just" (list (mk-var "y"))) + (mk-ctor "Just" (list 11)) + (empty-subst)) "y") 11) + +(gmatch-test "match-ctor-head-mismatch" + (match-pat (mk-ctor "Just" (list (mk-var "y"))) + (mk-ctor "Nothing" (list)) + (empty-subst)) nil) + +(gmatch-test "match-ctor-arity-mismatch" + (match-pat (mk-ctor "f" (list (mk-var "x") (mk-var "y"))) + (mk-ctor "f" (list 1)) + (empty-subst)) nil) + +(define gmatch-tests-run! + (fn () + {:passed gmatch-test-pass + :failed gmatch-test-fail + :total (+ gmatch-test-pass gmatch-test-fail)}))