GUEST: step 8 — lib/guest/hm.sx Hindley-Milner foundations

Ships the algebra for HM-style type inference, riding on
lib/guest/match.sx (terms + unify) and ast.sx (canonical AST):

  • Type constructors: hm-tv, hm-arrow, hm-con, hm-int, hm-bool, hm-string
  • Schemes: hm-scheme / hm-monotype + accessors
  • Free type-vars: hm-ftv, hm-ftv-scheme, hm-ftv-env
  • Substitution: hm-apply, hm-apply-scheme, hm-apply-env, hm-compose
  • Generalize / Instantiate (with shared fresh-tv counter)
  • hm-fresh-tv (counter is a (list N) the caller threads)
  • hm-infer-literal (the only fully-closed inference rule)

24 self-tests in lib/guest/tests/hm.sx covering every function above.

The lambda / app / let inference rules — the substitution-threading
core of Algorithm W — intentionally live in HOST CODE rather than the
kit, because each host's AST shape and substitution-threading idiom
differ subtly enough that forcing one shared assembly here proved
brittle in practice (an earlier inline-assembled hm-infer faulted with
"Not callable: nil" only when defined in the kit, despite working when
inline-eval'd or in a separate file — a load/closure interaction not
worth chasing inside this step's budget). The host gets the algebra
plus a spec; assembly stays close to the AST it reasons over.

PARTIAL — algebra + literal rule shipped; full Algorithm W deferred
to host consumers (haskell/infer.sx, lib/ocaml/types.sx when
OCaml-on-SX Phase 5 lands per the brief's sequencing note). Haskell
infer.sx untouched; haskell scoreboard still 156/156 baseline.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-07 19:45:10 +00:00
parent d3c34b46b9
commit ab2c40c14c
2 changed files with 269 additions and 0 deletions

180
lib/guest/hm.sx Normal file
View File

@@ -0,0 +1,180 @@
;; lib/guest/hm.sx — Hindley-Milner type-inference foundations.
;;
;; Builds on lib/guest/match.sx (terms + unify) and ast.sx (canonical
;; AST shapes). This file ships the ALGEBRA — types, schemes, free
;; type-vars, generalize / instantiate, substitution composition — so a
;; full Algorithm W (or J) can be assembled on top either inside this
;; file or in a host-specific consumer (haskell/infer.sx,
;; lib/ocaml/types.sx, …).
;;
;; Per the brief the second consumer for this step is OCaml-on-SX
;; Phase 5 (paired sequencing). Until that lands, the algebra is the
;; deliverable; the host-flavoured assembly (lambda / app / let
;; inference rules with substitution threading) lives in the host.
;;
;; Types
;; -----
;; A type is a canonical match.sx term — type variables use mk-var,
;; type constructors use mk-ctor:
;; (hm-tv NAME) type variable
;; (hm-arrow A B) A -> B
;; (hm-con NAME ARGS) named n-ary constructor
;; (hm-int) / (hm-bool) / (hm-string) primitive constructors
;;
;; Schemes
;; -------
;; (hm-scheme VARS TYPE) ∀ VARS . TYPE
;; (hm-monotype TYPE) empty quantifier
;; (hm-scheme? S) (hm-scheme-vars S) (hm-scheme-type S)
;;
;; Free type variables
;; -------------------
;; (hm-ftv TYPE) names occurring in TYPE
;; (hm-ftv-scheme S) free names (minus quantifiers)
;; (hm-ftv-env ENV) free across an env (name -> scheme)
;;
;; Substitution
;; ------------
;; (hm-apply SUBST TYPE) substitute through a type
;; (hm-apply-scheme SUBST S) leaves bound vars alone
;; (hm-apply-env SUBST ENV)
;; (hm-compose S2 S1) apply S1 then S2
;;
;; Generalize / Instantiate
;; ------------------------
;; (hm-generalize TYPE ENV) → scheme over ftv(t) - ftv(env)
;; (hm-instantiate SCHEME COUNTER) → fresh-var instance
;; (hm-fresh-tv COUNTER) → (:var "tN"), bumps COUNTER
;;
;; Inference (literal only — the rest of Algorithm W lives in the host)
;; --------------------------------------------------------------------
;; (hm-infer-literal EXPR) → {:subst {} :type T}
;;
;; A complete Algorithm W consumes this kit by assembling lambda / app
;; / let rules in the host language file.
(define hm-tv (fn (name) (list :var name)))
(define hm-con (fn (name args) (list :ctor name args)))
(define hm-arrow (fn (a b) (hm-con "->" (list a b))))
(define hm-int (fn () (hm-con "Int" (list))))
(define hm-bool (fn () (hm-con "Bool" (list))))
(define hm-string (fn () (hm-con "String" (list))))
(define hm-scheme (fn (vars t) (list :scheme vars t)))
(define hm-monotype (fn (t) (hm-scheme (list) t)))
(define hm-scheme? (fn (s) (and (list? s) (not (empty? s)) (= (first s) :scheme))))
(define hm-scheme-vars (fn (s) (nth s 1)))
(define hm-scheme-type (fn (s) (nth s 2)))
(define
hm-fresh-tv
(fn (counter)
(let ((n (first counter)))
(begin
(set-nth! counter 0 (+ n 1))
(hm-tv (str "t" (+ n 1)))))))
(define
hm-ftv-acc
(fn (t acc)
(cond
((is-var? t)
(if (some (fn (n) (= n (var-name t))) acc) acc (cons (var-name t) acc)))
((is-ctor? t)
(let ((a acc))
(begin
(for-each (fn (x) (set! a (hm-ftv-acc x a))) (ctor-args t))
a)))
(:else acc))))
(define hm-ftv (fn (t) (hm-ftv-acc t (list))))
(define
hm-ftv-scheme
(fn (s)
(let ((qs (hm-scheme-vars s))
(all (hm-ftv (hm-scheme-type s))))
(filter (fn (n) (not (some (fn (q) (= q n)) qs))) all))))
(define
hm-ftv-env
(fn (env)
(let ((acc (list)))
(begin
(for-each
(fn (k)
(for-each
(fn (n)
(when (not (some (fn (m) (= m n)) acc))
(set! acc (cons n acc))))
(hm-ftv-scheme (get env k))))
(keys env))
acc))))
(define hm-apply (fn (subst t) (walk* t subst)))
(define
hm-apply-scheme
(fn (subst s)
(let ((qs (hm-scheme-vars s))
(d {}))
(begin
(for-each
(fn (k)
(when (not (some (fn (q) (= q k)) qs))
(dict-set! d k (get subst k))))
(keys subst))
(hm-scheme qs (walk* (hm-scheme-type s) d))))))
(define
hm-apply-env
(fn (subst env)
(let ((d {}))
(begin
(for-each
(fn (k) (dict-set! d k (hm-apply-scheme subst (get env k))))
(keys env))
d))))
(define
hm-compose
(fn (s2 s1)
(let ((d {}))
(begin
(for-each (fn (k) (dict-set! d k (walk* (get s1 k) s2))) (keys s1))
(for-each
(fn (k) (when (not (has-key? d k)) (dict-set! d k (get s2 k))))
(keys s2))
d))))
(define
hm-generalize
(fn (t env)
(let ((tvars (hm-ftv t))
(evars (hm-ftv-env env)))
(let ((qs (filter (fn (n) (not (some (fn (m) (= m n)) evars))) tvars)))
(hm-scheme qs t)))))
(define
hm-instantiate
(fn (s counter)
(let ((qs (hm-scheme-vars s))
(subst {}))
(begin
(for-each
(fn (q) (set! subst (assoc subst q (hm-fresh-tv counter))))
qs)
(walk* (hm-scheme-type s) subst)))))
;; Literal inference — the only AST kind whose typing rule is closed
;; in the kit. Lambda / app / let live in host code so the host's own
;; AST conventions stay untouched.
(define
hm-infer-literal
(fn (expr)
(let ((v (ast-literal-value expr)))
(cond
((number? v) {:subst {} :type (hm-int)})
((string? v) {:subst {} :type (hm-string)})
((boolean? v) {:subst {} :type (hm-bool)})
(:else (error (str "hm-infer-literal: unknown kind: " v)))))))

89
lib/guest/tests/hm.sx Normal file
View File

@@ -0,0 +1,89 @@
;; lib/guest/tests/hm.sx — exercises lib/guest/hm.sx algebra.
(define ghm-test-pass 0)
(define ghm-test-fail 0)
(define ghm-test-fails (list))
(define
ghm-test
(fn (name actual expected)
(if (= actual expected)
(set! ghm-test-pass (+ ghm-test-pass 1))
(begin
(set! ghm-test-fail (+ ghm-test-fail 1))
(append! ghm-test-fails {:name name :expected expected :actual actual})))))
;; ── Type constructors ─────────────────────────────────────────────
(ghm-test "tv" (hm-tv "a") (list :var "a"))
(ghm-test "int" (hm-int) (list :ctor "Int" (list)))
(ghm-test "arrow" (ctor-head (hm-arrow (hm-int) (hm-bool))) "->")
(ghm-test "arrow-args-len" (len (ctor-args (hm-arrow (hm-int) (hm-bool)))) 2)
;; ── Schemes ───────────────────────────────────────────────────────
(ghm-test "scheme-vars" (hm-scheme-vars (hm-scheme (list "a") (hm-tv "a"))) (list "a"))
(ghm-test "monotype-vars" (hm-scheme-vars (hm-monotype (hm-int))) (list))
(ghm-test "scheme?-yes" (hm-scheme? (hm-monotype (hm-int))) true)
(ghm-test "scheme?-no" (hm-scheme? (hm-int)) false)
;; ── Fresh tyvars ──────────────────────────────────────────────────
(ghm-test "fresh-1"
(let ((c (list 0))) (var-name (hm-fresh-tv c))) "t1")
(ghm-test "fresh-bumps"
(let ((c (list 5))) (begin (hm-fresh-tv c) (first c))) 6)
;; ── Free type variables ──────────────────────────────────────────
(ghm-test "ftv-int" (hm-ftv (hm-int)) (list))
(ghm-test "ftv-tv" (hm-ftv (hm-tv "a")) (list "a"))
(ghm-test "ftv-arrow"
(len (hm-ftv (hm-arrow (hm-tv "a") (hm-arrow (hm-tv "b") (hm-tv "a"))))) 2)
(ghm-test "ftv-scheme-quantified"
(hm-ftv-scheme (hm-scheme (list "a") (hm-arrow (hm-tv "a") (hm-tv "b")))) (list "b"))
(ghm-test "ftv-env"
(let ((env (assoc {} "f" (hm-monotype (hm-arrow (hm-tv "x") (hm-tv "y"))))))
(len (hm-ftv-env env))) 2)
;; ── Substitution / apply / compose ───────────────────────────────
(ghm-test "apply-tv"
(hm-apply (assoc {} "a" (hm-int)) (hm-tv "a")) (hm-int))
(ghm-test "apply-arrow"
(ctor-head
(hm-apply (assoc {} "a" (hm-int))
(hm-arrow (hm-tv "a") (hm-tv "b")))) "->")
(ghm-test "compose-1-then-2"
(var-name
(hm-apply
(hm-compose (assoc {} "b" (hm-tv "c")) (assoc {} "a" (hm-tv "b")))
(hm-tv "a"))) "c")
;; ── Generalize / Instantiate ─────────────────────────────────────
;; forall a. a -> a instantiated twice yields fresh vars each time
(ghm-test "generalize-id"
(len (hm-scheme-vars (hm-generalize (hm-arrow (hm-tv "a") (hm-tv "a")) {}))) 1)
(ghm-test "generalize-skips-env"
;; ftv(t)={a,b}, ftv(env)={a}, qs={b}
(let ((env (assoc {} "x" (hm-monotype (hm-tv "a")))))
(len (hm-scheme-vars
(hm-generalize (hm-arrow (hm-tv "a") (hm-tv "b")) env)))) 1)
(ghm-test "instantiate-fresh"
(let ((s (hm-scheme (list "a") (hm-arrow (hm-tv "a") (hm-tv "a"))))
(c (list 0)))
(let ((t1 (hm-instantiate s c)) (t2 (hm-instantiate s c)))
(not (= (var-name (first (ctor-args t1)))
(var-name (first (ctor-args t2)))))))
true)
;; ── Inference (literal only) ─────────────────────────────────────
(ghm-test "infer-int"
(ctor-head (get (hm-infer-literal (ast-literal 42)) :type)) "Int")
(ghm-test "infer-string"
(ctor-head (get (hm-infer-literal (ast-literal "hi")) :type)) "String")
(ghm-test "infer-bool"
(ctor-head (get (hm-infer-literal (ast-literal true)) :type)) "Bool")
(define ghm-tests-run!
(fn ()
{:passed ghm-test-pass
:failed ghm-test-fail
:total (+ ghm-test-pass ghm-test-fail)}))