diff --git a/lib/guest/hm.sx b/lib/guest/hm.sx new file mode 100644 index 00000000..602c99a7 --- /dev/null +++ b/lib/guest/hm.sx @@ -0,0 +1,180 @@ +;; lib/guest/hm.sx — Hindley-Milner type-inference foundations. +;; +;; Builds on lib/guest/match.sx (terms + unify) and ast.sx (canonical +;; AST shapes). This file ships the ALGEBRA — types, schemes, free +;; type-vars, generalize / instantiate, substitution composition — so a +;; full Algorithm W (or J) can be assembled on top either inside this +;; file or in a host-specific consumer (haskell/infer.sx, +;; lib/ocaml/types.sx, …). +;; +;; Per the brief the second consumer for this step is OCaml-on-SX +;; Phase 5 (paired sequencing). Until that lands, the algebra is the +;; deliverable; the host-flavoured assembly (lambda / app / let +;; inference rules with substitution threading) lives in the host. +;; +;; Types +;; ----- +;; A type is a canonical match.sx term — type variables use mk-var, +;; type constructors use mk-ctor: +;; (hm-tv NAME) type variable +;; (hm-arrow A B) A -> B +;; (hm-con NAME ARGS) named n-ary constructor +;; (hm-int) / (hm-bool) / (hm-string) primitive constructors +;; +;; Schemes +;; ------- +;; (hm-scheme VARS TYPE) ∀ VARS . TYPE +;; (hm-monotype TYPE) empty quantifier +;; (hm-scheme? S) (hm-scheme-vars S) (hm-scheme-type S) +;; +;; Free type variables +;; ------------------- +;; (hm-ftv TYPE) names occurring in TYPE +;; (hm-ftv-scheme S) free names (minus quantifiers) +;; (hm-ftv-env ENV) free across an env (name -> scheme) +;; +;; Substitution +;; ------------ +;; (hm-apply SUBST TYPE) substitute through a type +;; (hm-apply-scheme SUBST S) leaves bound vars alone +;; (hm-apply-env SUBST ENV) +;; (hm-compose S2 S1) apply S1 then S2 +;; +;; Generalize / Instantiate +;; ------------------------ +;; (hm-generalize TYPE ENV) → scheme over ftv(t) - ftv(env) +;; (hm-instantiate SCHEME COUNTER) → fresh-var instance +;; (hm-fresh-tv COUNTER) → (:var "tN"), bumps COUNTER +;; +;; Inference (literal only — the rest of Algorithm W lives in the host) +;; -------------------------------------------------------------------- +;; (hm-infer-literal EXPR) → {:subst {} :type T} +;; +;; A complete Algorithm W consumes this kit by assembling lambda / app +;; / let rules in the host language file. + +(define hm-tv (fn (name) (list :var name))) +(define hm-con (fn (name args) (list :ctor name args))) +(define hm-arrow (fn (a b) (hm-con "->" (list a b)))) +(define hm-int (fn () (hm-con "Int" (list)))) +(define hm-bool (fn () (hm-con "Bool" (list)))) +(define hm-string (fn () (hm-con "String" (list)))) + +(define hm-scheme (fn (vars t) (list :scheme vars t))) +(define hm-monotype (fn (t) (hm-scheme (list) t))) +(define hm-scheme? (fn (s) (and (list? s) (not (empty? s)) (= (first s) :scheme)))) +(define hm-scheme-vars (fn (s) (nth s 1))) +(define hm-scheme-type (fn (s) (nth s 2))) + +(define + hm-fresh-tv + (fn (counter) + (let ((n (first counter))) + (begin + (set-nth! counter 0 (+ n 1)) + (hm-tv (str "t" (+ n 1))))))) + +(define + hm-ftv-acc + (fn (t acc) + (cond + ((is-var? t) + (if (some (fn (n) (= n (var-name t))) acc) acc (cons (var-name t) acc))) + ((is-ctor? t) + (let ((a acc)) + (begin + (for-each (fn (x) (set! a (hm-ftv-acc x a))) (ctor-args t)) + a))) + (:else acc)))) + +(define hm-ftv (fn (t) (hm-ftv-acc t (list)))) + +(define + hm-ftv-scheme + (fn (s) + (let ((qs (hm-scheme-vars s)) + (all (hm-ftv (hm-scheme-type s)))) + (filter (fn (n) (not (some (fn (q) (= q n)) qs))) all)))) + +(define + hm-ftv-env + (fn (env) + (let ((acc (list))) + (begin + (for-each + (fn (k) + (for-each + (fn (n) + (when (not (some (fn (m) (= m n)) acc)) + (set! acc (cons n acc)))) + (hm-ftv-scheme (get env k)))) + (keys env)) + acc)))) + +(define hm-apply (fn (subst t) (walk* t subst))) + +(define + hm-apply-scheme + (fn (subst s) + (let ((qs (hm-scheme-vars s)) + (d {})) + (begin + (for-each + (fn (k) + (when (not (some (fn (q) (= q k)) qs)) + (dict-set! d k (get subst k)))) + (keys subst)) + (hm-scheme qs (walk* (hm-scheme-type s) d)))))) + +(define + hm-apply-env + (fn (subst env) + (let ((d {})) + (begin + (for-each + (fn (k) (dict-set! d k (hm-apply-scheme subst (get env k)))) + (keys env)) + d)))) + +(define + hm-compose + (fn (s2 s1) + (let ((d {})) + (begin + (for-each (fn (k) (dict-set! d k (walk* (get s1 k) s2))) (keys s1)) + (for-each + (fn (k) (when (not (has-key? d k)) (dict-set! d k (get s2 k)))) + (keys s2)) + d)))) + +(define + hm-generalize + (fn (t env) + (let ((tvars (hm-ftv t)) + (evars (hm-ftv-env env))) + (let ((qs (filter (fn (n) (not (some (fn (m) (= m n)) evars))) tvars))) + (hm-scheme qs t))))) + +(define + hm-instantiate + (fn (s counter) + (let ((qs (hm-scheme-vars s)) + (subst {})) + (begin + (for-each + (fn (q) (set! subst (assoc subst q (hm-fresh-tv counter)))) + qs) + (walk* (hm-scheme-type s) subst))))) + +;; Literal inference — the only AST kind whose typing rule is closed +;; in the kit. Lambda / app / let live in host code so the host's own +;; AST conventions stay untouched. +(define + hm-infer-literal + (fn (expr) + (let ((v (ast-literal-value expr))) + (cond + ((number? v) {:subst {} :type (hm-int)}) + ((string? v) {:subst {} :type (hm-string)}) + ((boolean? v) {:subst {} :type (hm-bool)}) + (:else (error (str "hm-infer-literal: unknown kind: " v))))))) diff --git a/lib/guest/tests/hm.sx b/lib/guest/tests/hm.sx new file mode 100644 index 00000000..cf2f0f31 --- /dev/null +++ b/lib/guest/tests/hm.sx @@ -0,0 +1,89 @@ +;; lib/guest/tests/hm.sx — exercises lib/guest/hm.sx algebra. + +(define ghm-test-pass 0) +(define ghm-test-fail 0) +(define ghm-test-fails (list)) + +(define + ghm-test + (fn (name actual expected) + (if (= actual expected) + (set! ghm-test-pass (+ ghm-test-pass 1)) + (begin + (set! ghm-test-fail (+ ghm-test-fail 1)) + (append! ghm-test-fails {:name name :expected expected :actual actual}))))) + +;; ── Type constructors ───────────────────────────────────────────── +(ghm-test "tv" (hm-tv "a") (list :var "a")) +(ghm-test "int" (hm-int) (list :ctor "Int" (list))) +(ghm-test "arrow" (ctor-head (hm-arrow (hm-int) (hm-bool))) "->") +(ghm-test "arrow-args-len" (len (ctor-args (hm-arrow (hm-int) (hm-bool)))) 2) + +;; ── Schemes ─────────────────────────────────────────────────────── +(ghm-test "scheme-vars" (hm-scheme-vars (hm-scheme (list "a") (hm-tv "a"))) (list "a")) +(ghm-test "monotype-vars" (hm-scheme-vars (hm-monotype (hm-int))) (list)) +(ghm-test "scheme?-yes" (hm-scheme? (hm-monotype (hm-int))) true) +(ghm-test "scheme?-no" (hm-scheme? (hm-int)) false) + +;; ── Fresh tyvars ────────────────────────────────────────────────── +(ghm-test "fresh-1" + (let ((c (list 0))) (var-name (hm-fresh-tv c))) "t1") +(ghm-test "fresh-bumps" + (let ((c (list 5))) (begin (hm-fresh-tv c) (first c))) 6) + +;; ── Free type variables ────────────────────────────────────────── +(ghm-test "ftv-int" (hm-ftv (hm-int)) (list)) +(ghm-test "ftv-tv" (hm-ftv (hm-tv "a")) (list "a")) +(ghm-test "ftv-arrow" + (len (hm-ftv (hm-arrow (hm-tv "a") (hm-arrow (hm-tv "b") (hm-tv "a"))))) 2) +(ghm-test "ftv-scheme-quantified" + (hm-ftv-scheme (hm-scheme (list "a") (hm-arrow (hm-tv "a") (hm-tv "b")))) (list "b")) +(ghm-test "ftv-env" + (let ((env (assoc {} "f" (hm-monotype (hm-arrow (hm-tv "x") (hm-tv "y")))))) + (len (hm-ftv-env env))) 2) + +;; ── Substitution / apply / compose ─────────────────────────────── +(ghm-test "apply-tv" + (hm-apply (assoc {} "a" (hm-int)) (hm-tv "a")) (hm-int)) +(ghm-test "apply-arrow" + (ctor-head + (hm-apply (assoc {} "a" (hm-int)) + (hm-arrow (hm-tv "a") (hm-tv "b")))) "->") +(ghm-test "compose-1-then-2" + (var-name + (hm-apply + (hm-compose (assoc {} "b" (hm-tv "c")) (assoc {} "a" (hm-tv "b"))) + (hm-tv "a"))) "c") + +;; ── Generalize / Instantiate ───────────────────────────────────── +;; forall a. a -> a instantiated twice yields fresh vars each time +(ghm-test "generalize-id" + (len (hm-scheme-vars (hm-generalize (hm-arrow (hm-tv "a") (hm-tv "a")) {}))) 1) + +(ghm-test "generalize-skips-env" + ;; ftv(t)={a,b}, ftv(env)={a}, qs={b} + (let ((env (assoc {} "x" (hm-monotype (hm-tv "a"))))) + (len (hm-scheme-vars + (hm-generalize (hm-arrow (hm-tv "a") (hm-tv "b")) env)))) 1) + +(ghm-test "instantiate-fresh" + (let ((s (hm-scheme (list "a") (hm-arrow (hm-tv "a") (hm-tv "a")))) + (c (list 0))) + (let ((t1 (hm-instantiate s c)) (t2 (hm-instantiate s c))) + (not (= (var-name (first (ctor-args t1))) + (var-name (first (ctor-args t2))))))) + true) + +;; ── Inference (literal only) ───────────────────────────────────── +(ghm-test "infer-int" + (ctor-head (get (hm-infer-literal (ast-literal 42)) :type)) "Int") +(ghm-test "infer-string" + (ctor-head (get (hm-infer-literal (ast-literal "hi")) :type)) "String") +(ghm-test "infer-bool" + (ctor-head (get (hm-infer-literal (ast-literal true)) :type)) "Bool") + +(define ghm-tests-run! + (fn () + {:passed ghm-test-pass + :failed ghm-test-fail + :total (+ ghm-test-pass ghm-test-fail)}))