GUEST: step 6 — lib/guest/match.sx pure unify + match kit

Pure-functional pattern-match + unification, shipped for miniKanren
(minikraken) / Datalog and any other logic-flavoured guest that wants
immutable unification without writing it from scratch.

Canonical wire format (config callbacks let other shapes plug in):
  var          (:var NAME)
  constructor  (:ctor HEAD ARGS)
  literal      number / string / boolean / nil

Public API:
  empty-subst  walk  walk*  extend  occurs?
  unify        (symmetric, with occurs check)
  unify-with   (cfg-driven for non-canonical term shapes)
  match-pat    (asymmetric pattern→value, vars only in pattern)
  match-pat-with (cfg-driven)

lib/guest/tests/match.sx — 25 tests covering walk chains, occurs,
unify (literal/var/ctor, head + arity mismatch, transitive vars),
match-pat. All passing.

The brief flagged this as the highest-risk step ("revert and redesign
on any regression"). The two existing engines — haskell/match.sx
(pure asymmetric, lazy, returns env-or-nil) and prolog runtime.sx
pl-unify! (mutating symmetric, trail-based, returns bool) — are
structurally divergent and forcing a shared core under either of their
contracts would risk the 746 tests they currently pass. Both are
untouched; they remain at baseline (haskell 156/156, prolog 590/590)
because none of their source files were modified.

PARTIAL — kit shipped, prolog/haskell ports deferred until a guest
chooses to migrate or until a third consumer (minikraken / datalog)
provides a less risky migration path.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-07 18:41:29 +00:00
parent 2defa5e739
commit 863e9d93a4
2 changed files with 293 additions and 0 deletions

185
lib/guest/match.sx Normal file
View File

@@ -0,0 +1,185 @@
;; lib/guest/match.sx — pure pattern-match + unification kit.
;;
;; Shipped for miniKanren / Datalog / future logic-flavoured guests that
;; want immutable unification without writing it from scratch. The two
;; existing prolog/haskell engines stay as-is — porting them in place
;; risks the 746 tests they currently pass; consumers can migrate
;; gradually via the converters in lib/guest/ast.sx.
;;
;; Term shapes (canonical wire format)
;; -----------------------------------
;; var (:var NAME) NAME a string
;; constructor (:ctor HEAD ARGS) HEAD a string, ARGS a list of terms
;; literal number / string / boolean / nil
;;
;; Guests with their own shape pass adapter callbacks via the cfg arg —
;; see (unify-with cfg ...) and (match-pat-with cfg ...) below. The
;; default canonical entry points (unify / match-pat) use the wire shape.
;;
;; Substitution / env
;; ------------------
;; A substitution is a SX dict mapping VAR-NAME → term. There are no
;; trails, no mutation: each step either returns an extended dict or nil.
;;
;; (empty-subst) → {}
;; (walk term s) → term with top-level vars resolved
;; (walk* term s) → term with all vars resolved (recursive)
;; (extend name term s) → s with NAME → term added
;; (occurs? name term s) → bool
;;
;; Unify (symmetric, miniKanren-flavour)
;; -------------------------------------
;; (unify u v s) → extended subst or nil
;; (unify-with cfg u v s) → ditto, with adapter callbacks:
;; :var? :var-name :ctor? :ctor-head
;; :ctor-args :occurs-check?
;;
;; Match (asymmetric, haskell-flavour: pattern → value, vars only in pat)
;; ---------------------------------------------------------------------
;; (match-pat pat val env) → extended env or nil
;; (match-pat-with cfg pat val env)
(define mk-var (fn (name) (list :var name)))
(define mk-ctor (fn (head args) (list :ctor head args)))
(define is-var? (fn (t) (and (list? t) (not (empty? t)) (= (first t) :var))))
(define is-ctor? (fn (t) (and (list? t) (not (empty? t)) (= (first t) :ctor))))
(define var-name (fn (t) (nth t 1)))
(define ctor-head (fn (t) (nth t 1)))
(define ctor-args (fn (t) (nth t 2)))
(define empty-subst (fn () {}))
(define
walk
(fn (t s)
(if (and (is-var? t) (has-key? s (var-name t)))
(walk (get s (var-name t)) s)
t)))
(define
walk*
(fn (t s)
(let ((w (walk t s)))
(cond
((is-ctor? w)
(mk-ctor (ctor-head w) (map (fn (a) (walk* a s)) (ctor-args w))))
(:else w)))))
(define
extend
(fn (name term s)
(assoc s name term)))
(define
occurs?
(fn (name term s)
(let ((w (walk term s)))
(cond
((is-var? w) (= (var-name w) name))
((is-ctor? w) (some (fn (a) (occurs? name a s)) (ctor-args w)))
(:else false)))))
(define
unify-with
(fn (cfg u v s)
(let ((var?-fn (get cfg :var?))
(var-name-fn (get cfg :var-name))
(ctor?-fn (get cfg :ctor?))
(ctor-head-fn (get cfg :ctor-head))
(ctor-args-fn (get cfg :ctor-args))
(occurs?-on (get cfg :occurs-check?)))
(let ((wu (walk-with cfg u s))
(wv (walk-with cfg v s)))
(cond
((and (var?-fn wu) (var?-fn wv) (= (var-name-fn wu) (var-name-fn wv))) s)
((var?-fn wu)
(if (and occurs?-on (occurs-with cfg (var-name-fn wu) wv s))
nil
(extend (var-name-fn wu) wv s)))
((var?-fn wv)
(if (and occurs?-on (occurs-with cfg (var-name-fn wv) wu s))
nil
(extend (var-name-fn wv) wu s)))
((and (ctor?-fn wu) (ctor?-fn wv))
(if (= (ctor-head-fn wu) (ctor-head-fn wv))
(unify-list-with
cfg
(ctor-args-fn wu)
(ctor-args-fn wv)
s)
nil))
(:else (if (= wu wv) s nil)))))))
(define
walk-with
(fn (cfg t s)
(if (and ((get cfg :var?) t) (has-key? s ((get cfg :var-name) t)))
(walk-with cfg (get s ((get cfg :var-name) t)) s)
t)))
(define
occurs-with
(fn (cfg name term s)
(let ((w (walk-with cfg term s)))
(cond
(((get cfg :var?) w) (= ((get cfg :var-name) w) name))
(((get cfg :ctor?) w)
(some (fn (a) (occurs-with cfg name a s)) ((get cfg :ctor-args) w)))
(:else false)))))
(define
unify-list-with
(fn (cfg xs ys s)
(cond
((and (empty? xs) (empty? ys)) s)
((or (empty? xs) (empty? ys)) nil)
(:else
(let ((s2 (unify-with cfg (first xs) (first ys) s)))
(if (= s2 nil)
nil
(unify-list-with cfg (rest xs) (rest ys) s2)))))))
(define canonical-cfg
{:var? is-var? :var-name var-name
:ctor? is-ctor? :ctor-head ctor-head :ctor-args ctor-args
:occurs-check? true})
(define unify (fn (u v s) (unify-with canonical-cfg u v s)))
;; Asymmetric pattern match (haskell-style): only patterns may contain vars;
;; values are concrete. On a var pattern, bind name to value.
(define
match-pat-with
(fn (cfg pat val env)
(let ((var?-fn (get cfg :var?))
(var-name-fn (get cfg :var-name))
(ctor?-fn (get cfg :ctor?))
(ctor-head-fn (get cfg :ctor-head))
(ctor-args-fn (get cfg :ctor-args)))
(cond
((var?-fn pat) (extend (var-name-fn pat) val env))
((and (ctor?-fn pat) (ctor?-fn val))
(if (= (ctor-head-fn pat) (ctor-head-fn val))
(match-list-pat-with
cfg
(ctor-args-fn pat)
(ctor-args-fn val)
env)
nil))
((ctor?-fn pat) nil)
(:else (if (= pat val) env nil))))))
(define
match-list-pat-with
(fn (cfg pats vals env)
(cond
((and (empty? pats) (empty? vals)) env)
((or (empty? pats) (empty? vals)) nil)
(:else
(let ((env2 (match-pat-with cfg (first pats) (first vals) env)))
(if (= env2 nil)
nil
(match-list-pat-with cfg (rest pats) (rest vals) env2)))))))
(define match-pat (fn (pat val env) (match-pat-with canonical-cfg pat val env)))

108
lib/guest/tests/match.sx Normal file
View File

@@ -0,0 +1,108 @@
;; lib/guest/tests/match.sx — exercises lib/guest/match.sx.
(define gmatch-test-pass 0)
(define gmatch-test-fail 0)
(define gmatch-test-fails (list))
(define
gmatch-test
(fn (name actual expected)
(if (= actual expected)
(set! gmatch-test-pass (+ gmatch-test-pass 1))
(begin
(set! gmatch-test-fail (+ gmatch-test-fail 1))
(append! gmatch-test-fails {:name name :expected expected :actual actual})))))
;; ── walk / extend / occurs ────────────────────────────────────────
(gmatch-test "walk-direct"
(walk (mk-var "x") (extend "x" 5 (empty-subst))) 5)
(gmatch-test "walk-chain"
(walk (mk-var "a") (extend "a" (mk-var "b") (extend "b" 7 (empty-subst)))) 7)
(gmatch-test "walk-no-binding"
(let ((v (mk-var "u"))) (= (walk v (empty-subst)) v)) true)
(gmatch-test "walk*-recursive"
(walk* (mk-ctor "Just" (list (mk-var "x"))) (extend "x" 9 (empty-subst)))
(mk-ctor "Just" (list 9)))
(gmatch-test "occurs-direct"
(occurs? "x" (mk-var "x") (empty-subst)) true)
(gmatch-test "occurs-nested"
(occurs? "x" (mk-ctor "f" (list (mk-var "x"))) (empty-subst)) true)
(gmatch-test "occurs-not"
(occurs? "x" (mk-var "y") (empty-subst)) false)
;; ── unify (symmetric) ─────────────────────────────────────────────
(gmatch-test "unify-equal-literals"
(len (unify 5 5 (empty-subst))) 0)
(gmatch-test "unify-different-literals"
(unify 5 6 (empty-subst)) nil)
(gmatch-test "unify-var-literal"
(get (unify (mk-var "x") 5 (empty-subst)) "x") 5)
(gmatch-test "unify-literal-var"
(get (unify 5 (mk-var "x") (empty-subst)) "x") 5)
(gmatch-test "unify-same-var"
(len (unify (mk-var "x") (mk-var "x") (empty-subst))) 0)
(gmatch-test "unify-two-vars"
(let ((s (unify (mk-var "x") (mk-var "y") (empty-subst))))
(or (= (get s "x") (mk-var "y")) (= (get s "y") (mk-var "x")))) true)
(gmatch-test "unify-ctor-equal"
(len (unify (mk-ctor "f" (list 1 2)) (mk-ctor "f" (list 1 2)) (empty-subst))) 0)
(gmatch-test "unify-ctor-with-var"
(get (unify (mk-ctor "Just" (list (mk-var "x"))) (mk-ctor "Just" (list 7)) (empty-subst)) "x") 7)
(gmatch-test "unify-ctor-head-mismatch"
(unify (mk-ctor "Just" (list 1)) (mk-ctor "Nothing" (list)) (empty-subst)) nil)
(gmatch-test "unify-ctor-arity-mismatch"
(unify (mk-ctor "f" (list 1 2)) (mk-ctor "f" (list 1)) (empty-subst)) nil)
(gmatch-test "unify-occurs-check"
(unify (mk-var "x") (mk-ctor "f" (list (mk-var "x"))) (empty-subst)) nil)
(gmatch-test "unify-transitive-vars"
(let ((s (unify (mk-var "x") (mk-var "y") (empty-subst))))
(let ((s2 (unify (mk-var "y") 42 s)))
(walk (mk-var "x") s2))) 42)
;; ── match-pat (asymmetric) ────────────────────────────────────────
(gmatch-test "match-var-binds"
(get (match-pat (mk-var "x") 99 (empty-subst)) "x") 99)
(gmatch-test "match-literal-equal"
(len (match-pat 5 5 (empty-subst))) 0)
(gmatch-test "match-literal-mismatch"
(match-pat 5 6 (empty-subst)) nil)
(gmatch-test "match-ctor-binds"
(get (match-pat (mk-ctor "Just" (list (mk-var "y")))
(mk-ctor "Just" (list 11))
(empty-subst)) "y") 11)
(gmatch-test "match-ctor-head-mismatch"
(match-pat (mk-ctor "Just" (list (mk-var "y")))
(mk-ctor "Nothing" (list))
(empty-subst)) nil)
(gmatch-test "match-ctor-arity-mismatch"
(match-pat (mk-ctor "f" (list (mk-var "x") (mk-var "y")))
(mk-ctor "f" (list 1))
(empty-subst)) nil)
(define gmatch-tests-run!
(fn ()
{:passed gmatch-test-pass
:failed gmatch-test-fail
:total (+ gmatch-test-pass gmatch-test-fail)}))