ocaml: phase 5 HM type inference — closes lib-guest step 8 (+14 tests, 265 total)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 53s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 53s
OCaml-on-SX is the deferred second consumer for lib/guest/hm.sx step 8.
lib/ocaml/infer.sx assembles Algorithm W on top of the shipped algebra:
- Var: lookup + hm-instantiate.
- Fun: fresh-tv per param, auto-curried via recursion.
- App: unify against hm-arrow, fresh-tv for result.
- Let: generalize rhs over (ftv(t) - ftv(env)) — let-polymorphism.
- If: unify cond with Bool, both branches with each other.
- Op (+, =, <, etc.): builtin signatures (int*int->int monomorphic,
=/<> polymorphic 'a->'a->bool).
Tests pass for: literals, fun x -> x : 'a -> 'a, let id ... id 5/id true,
fun f x -> f (f x) : ('a -> 'a) -> 'a -> 'a (twice).
Pending: tuples, lists, pattern matching, let-rec, modules in HM.
This commit is contained in:
209
lib/ocaml/infer.sx
Normal file
209
lib/ocaml/infer.sx
Normal file
@@ -0,0 +1,209 @@
|
||||
;; lib/ocaml/infer.sx — Algorithm W type inference for OCaml-on-SX.
|
||||
;;
|
||||
;; Consumes lib/guest/hm.sx (algebra) and lib/guest/match.sx (unify) per
|
||||
;; the Phase 5 sequencing. The kit ships fresh-tv, generalize,
|
||||
;; instantiate, and substitution composition; this file assembles the
|
||||
;; lambda / app / let / if rules of Algorithm W against the OCaml AST.
|
||||
;;
|
||||
;; Coverage in this slice (atoms + core forms):
|
||||
;; :int :float :string :char :bool :unit :var :fun :app :let :if
|
||||
;; :op (with builtin signatures for +, -, *, /, mod, comparisons, &&, ||)
|
||||
;;
|
||||
;; Out of scope: pattern matching, tuples, lists (need product/list types
|
||||
;; first), records, modules, ADTs, let-rec.
|
||||
;;
|
||||
;; Inference state:
|
||||
;; env — dict: name → scheme
|
||||
;; counter — one-element list (mutable cell) used by hm-fresh-tv
|
||||
;;
|
||||
;; Returned value: {:subst S :type T}.
|
||||
|
||||
(define ocaml-hm-counter (fn () (list 0)))
|
||||
|
||||
(define ocaml-hm-empty-subst (fn () {}))
|
||||
|
||||
(define ocaml-hm-builtin-env
|
||||
(fn ()
|
||||
(let ((int-int-int (hm-arrow (hm-int) (hm-arrow (hm-int) (hm-int))))
|
||||
(int-int-bool (hm-arrow (hm-int) (hm-arrow (hm-int) (hm-bool))))
|
||||
(bool-bool-bool (hm-arrow (hm-bool) (hm-arrow (hm-bool) (hm-bool))))
|
||||
(str-str-str (hm-arrow (hm-string) (hm-arrow (hm-string) (hm-string))))
|
||||
(any-any-bool
|
||||
(let ((a (hm-tv "a")))
|
||||
(hm-scheme (list "a")
|
||||
(hm-arrow a (hm-arrow a (hm-bool))))))
|
||||
(a->a
|
||||
(let ((a (hm-tv "a")))
|
||||
(hm-scheme (list "a") (hm-arrow a a)))))
|
||||
{"+" (hm-monotype int-int-int)
|
||||
"-" (hm-monotype int-int-int)
|
||||
"*" (hm-monotype int-int-int)
|
||||
"/" (hm-monotype int-int-int)
|
||||
"mod" (hm-monotype int-int-int)
|
||||
"%" (hm-monotype int-int-int)
|
||||
"**" (hm-monotype int-int-int)
|
||||
"<" (hm-monotype int-int-bool)
|
||||
">" (hm-monotype int-int-bool)
|
||||
"<=" (hm-monotype int-int-bool)
|
||||
">=" (hm-monotype int-int-bool)
|
||||
"=" any-any-bool
|
||||
"<>" any-any-bool
|
||||
"&&" (hm-monotype bool-bool-bool)
|
||||
"||" (hm-monotype bool-bool-bool)
|
||||
"^" (hm-monotype str-str-str)
|
||||
"not" (hm-monotype (hm-arrow (hm-bool) (hm-bool)))
|
||||
"succ" (hm-monotype (hm-arrow (hm-int) (hm-int)))
|
||||
"pred" (hm-monotype (hm-arrow (hm-int) (hm-int)))
|
||||
"abs" (hm-monotype (hm-arrow (hm-int) (hm-int)))})))
|
||||
|
||||
(define ocaml-infer (fn (expr env counter) nil))
|
||||
|
||||
;; Unify two types; raise on failure. The match.sx unify returns nil on
|
||||
;; failure so we wrap it for clearer errors.
|
||||
(define ocaml-hm-unify
|
||||
(fn (t1 t2 subst)
|
||||
(let ((s2 (unify t1 t2 subst)))
|
||||
(cond
|
||||
((= s2 nil)
|
||||
(error (str "ocaml-infer: cannot unify " t1 " with " t2)))
|
||||
(else s2)))))
|
||||
|
||||
;; Look up name; instantiate scheme to a fresh monotype.
|
||||
(define ocaml-infer-var
|
||||
(fn (name env counter)
|
||||
(cond
|
||||
((has-key? env name)
|
||||
(let ((scheme (get env name)))
|
||||
(let ((t (hm-instantiate scheme counter)))
|
||||
{:subst {} :type t})))
|
||||
(else (error (str "ocaml-infer: unbound variable " name))))))
|
||||
|
||||
(define ocaml-infer-app
|
||||
(fn (fn-expr arg-expr env counter)
|
||||
(let ((r1 (ocaml-infer fn-expr env counter)))
|
||||
(let ((s1 (get r1 :subst)) (t1 (get r1 :type)))
|
||||
(let ((env2 (hm-apply-env s1 env)))
|
||||
(let ((r2 (ocaml-infer arg-expr env2 counter)))
|
||||
(let ((s2 (get r2 :subst)) (t2 (get r2 :type)))
|
||||
(let ((tv (hm-fresh-tv counter)))
|
||||
(let ((s3 (ocaml-hm-unify
|
||||
(hm-apply s2 t1)
|
||||
(hm-arrow t2 tv)
|
||||
(hm-compose s2 s1))))
|
||||
{:subst s3 :type (hm-apply s3 tv)})))))))))
|
||||
|
||||
(define ocaml-infer-fun
|
||||
(fn (params body env counter)
|
||||
(cond
|
||||
((= (len params) 0)
|
||||
(error "ocaml-infer: fun without params"))
|
||||
((= (len params) 1)
|
||||
(let ((tv (hm-fresh-tv counter)))
|
||||
(let ((env2 (assoc env (first params) (hm-monotype tv))))
|
||||
(let ((r (ocaml-infer body env2 counter)))
|
||||
(let ((s (get r :subst)) (t-body (get r :type)))
|
||||
{:subst s
|
||||
:type (hm-arrow (hm-apply s tv) t-body)})))))
|
||||
(else
|
||||
;; Curry: fun x y -> e ≡ fun x -> fun y -> e
|
||||
(let ((tv (hm-fresh-tv counter)))
|
||||
(let ((env2 (assoc env (first params) (hm-monotype tv))))
|
||||
(let ((r (ocaml-infer-fun (rest params) body env2 counter)))
|
||||
(let ((s (get r :subst)) (t-rest (get r :type)))
|
||||
{:subst s
|
||||
:type (hm-arrow (hm-apply s tv) t-rest)}))))))))
|
||||
|
||||
(define ocaml-infer-let
|
||||
(fn (name params rhs body env counter)
|
||||
(let ((rhs-expr (cond
|
||||
((= (len params) 0) rhs)
|
||||
(else (list :fun params rhs)))))
|
||||
(let ((r1 (ocaml-infer rhs-expr env counter)))
|
||||
(let ((s1 (get r1 :subst)) (t1 (get r1 :type)))
|
||||
(let ((env2 (hm-apply-env s1 env)))
|
||||
(let ((scheme (hm-generalize t1 env2)))
|
||||
(let ((env3 (assoc env2 name scheme)))
|
||||
(let ((r2 (ocaml-infer body env3 counter)))
|
||||
(let ((s2 (get r2 :subst)) (t2 (get r2 :type)))
|
||||
{:subst (hm-compose s2 s1) :type t2}))))))))))
|
||||
|
||||
(define ocaml-infer-if
|
||||
(fn (c-ast t-ast e-ast env counter)
|
||||
(let ((rc (ocaml-infer c-ast env counter)))
|
||||
(let ((sc (get rc :subst)) (tc (get rc :type)))
|
||||
(let ((sc2 (ocaml-hm-unify tc (hm-bool) sc)))
|
||||
(let ((env2 (hm-apply-env sc2 env)))
|
||||
(let ((rt (ocaml-infer t-ast env2 counter)))
|
||||
(let ((st (get rt :subst)) (tt (get rt :type)))
|
||||
(let ((env3 (hm-apply-env st env2)))
|
||||
(let ((re (ocaml-infer e-ast env3 counter)))
|
||||
(let ((se (get re :subst)) (te (get re :type)))
|
||||
(let ((sf (ocaml-hm-unify
|
||||
(hm-apply se tt)
|
||||
te
|
||||
(hm-compose se (hm-compose st sc2)))))
|
||||
{:subst sf
|
||||
:type (hm-apply sf te)}))))))))))))
|
||||
|
||||
(set! ocaml-infer
|
||||
(fn (expr env counter)
|
||||
(let ((tag (nth expr 0)))
|
||||
(cond
|
||||
((= tag "int") {:subst {} :type (hm-int)})
|
||||
((= tag "float") {:subst {} :type (hm-int)}) ;; treat float as int for now
|
||||
((= tag "string") {:subst {} :type (hm-string)})
|
||||
((= tag "char") {:subst {} :type (hm-string)})
|
||||
((= tag "bool") {:subst {} :type (hm-bool)})
|
||||
((= tag "unit") {:subst {} :type (hm-con "Unit" (list))})
|
||||
((= tag "var") (ocaml-infer-var (nth expr 1) env counter))
|
||||
((= tag "fun") (ocaml-infer-fun (nth expr 1) (nth expr 2) env counter))
|
||||
((= tag "app") (ocaml-infer-app (nth expr 1) (nth expr 2) env counter))
|
||||
((= tag "let") (ocaml-infer-let (nth expr 1) (nth expr 2)
|
||||
(nth expr 3) (nth expr 4) env counter))
|
||||
((= tag "if") (ocaml-infer-if (nth expr 1) (nth expr 2)
|
||||
(nth expr 3) env counter))
|
||||
((= tag "neg")
|
||||
(let ((r (ocaml-infer (nth expr 1) env counter)))
|
||||
(let ((s (get r :subst)) (t (get r :type)))
|
||||
(let ((s2 (ocaml-hm-unify t (hm-int) s)))
|
||||
{:subst s2 :type (hm-int)}))))
|
||||
((= tag "not")
|
||||
(let ((r (ocaml-infer (nth expr 1) env counter)))
|
||||
(let ((s (get r :subst)) (t (get r :type)))
|
||||
(let ((s2 (ocaml-hm-unify t (hm-bool) s)))
|
||||
{:subst s2 :type (hm-bool)}))))
|
||||
((= tag "op")
|
||||
;; Treat (:op OP L R) as (:app (:app (:var OP) L) R) — same rule.
|
||||
(ocaml-infer
|
||||
(list :app (list :app (list :var (nth expr 1)) (nth expr 2)) (nth expr 3))
|
||||
env counter))
|
||||
(else (error (str "ocaml-infer: unsupported tag " tag)))))))
|
||||
|
||||
;; Top-level convenience: parse + infer + render the type.
|
||||
(define ocaml-type-of
|
||||
(fn (src)
|
||||
(let ((expr (ocaml-parse src))
|
||||
(env (ocaml-hm-builtin-env))
|
||||
(counter (ocaml-hm-counter)))
|
||||
(let ((r (ocaml-infer expr env counter)))
|
||||
(ocaml-hm-format-type (hm-apply (get r :subst) (get r :type)))))))
|
||||
|
||||
;; Pretty-print a type as an OCaml-style string for testing. Only handles
|
||||
;; the constructors we use: Int / Bool / String / Unit / -> / type-vars.
|
||||
(define ocaml-hm-format-type
|
||||
(fn (t)
|
||||
(cond
|
||||
((is-var? t) (str "'" (var-name t)))
|
||||
((is-ctor? t)
|
||||
(let ((head (ctor-head t)) (args (ctor-args t)))
|
||||
(cond
|
||||
((= head "->")
|
||||
(let ((a (nth args 0)) (b (nth args 1)))
|
||||
(str
|
||||
(cond
|
||||
((and (is-ctor? a) (= (ctor-head a) "->"))
|
||||
(str "(" (ocaml-hm-format-type a) ")"))
|
||||
(else (ocaml-hm-format-type a)))
|
||||
" -> " (ocaml-hm-format-type b))))
|
||||
(else head))))
|
||||
(else (str t)))))
|
||||
@@ -30,10 +30,13 @@ cat > "$TMPFILE" << 'EPOCHS'
|
||||
(load "lib/guest/lex.sx")
|
||||
(load "lib/guest/prefix.sx")
|
||||
(load "lib/guest/pratt.sx")
|
||||
(load "lib/guest/match.sx")
|
||||
(load "lib/guest/hm.sx")
|
||||
(load "lib/ocaml/tokenizer.sx")
|
||||
(load "lib/ocaml/parser.sx")
|
||||
(load "lib/ocaml/eval.sx")
|
||||
(load "lib/ocaml/runtime.sx")
|
||||
(load "lib/ocaml/infer.sx")
|
||||
(load "lib/ocaml/tests/tokenize.sx")
|
||||
(eval "(ocaml-load-stdlib!)")
|
||||
|
||||
@@ -639,6 +642,36 @@ cat > "$TMPFILE" << 'EPOCHS'
|
||||
(epoch 852)
|
||||
(eval "(ocaml-run-program \"let x = 1 and y = 2;; x + y\")")
|
||||
|
||||
;; ── Phase 5: Hindley-Milner type inference ────────────────────
|
||||
(epoch 900)
|
||||
(eval "(ocaml-type-of \"42\")")
|
||||
(epoch 901)
|
||||
(eval "(ocaml-type-of \"true\")")
|
||||
(epoch 902)
|
||||
(eval "(ocaml-type-of \"\\\"hi\\\"\")")
|
||||
(epoch 903)
|
||||
(eval "(ocaml-type-of \"1 + 2\")")
|
||||
(epoch 904)
|
||||
(eval "(ocaml-type-of \"fun x -> x + 1\")")
|
||||
(epoch 905)
|
||||
(eval "(ocaml-type-of \"fun x -> x\")")
|
||||
(epoch 906)
|
||||
(eval "(ocaml-type-of \"fun x y -> x + y\")")
|
||||
(epoch 907)
|
||||
(eval "(ocaml-type-of \"let f x = x + 1 in f 10\")")
|
||||
(epoch 908)
|
||||
(eval "(ocaml-type-of \"let id = fun x -> x in id 5\")")
|
||||
(epoch 909)
|
||||
(eval "(ocaml-type-of \"let id = fun x -> x in id true\")")
|
||||
(epoch 910)
|
||||
(eval "(ocaml-type-of \"if true then 1 else 2\")")
|
||||
(epoch 911)
|
||||
(eval "(ocaml-type-of \"fun f -> fun x -> f (f x)\")")
|
||||
(epoch 912)
|
||||
(eval "(ocaml-type-of \"fun b -> if b then 1 else 0\")")
|
||||
(epoch 913)
|
||||
(eval "(ocaml-type-of \"not true\")")
|
||||
|
||||
EPOCHS
|
||||
|
||||
OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null)
|
||||
@@ -1016,6 +1049,22 @@ check 850 "even 10 (mutual rec)" 'true'
|
||||
check 851 "odd 7 (mutual rec)" 'true'
|
||||
check 852 "let x = 1 and y = 2" '3'
|
||||
|
||||
# ── Phase 5: Hindley-Milner type inference ────────────────────
|
||||
check 900 "type 42 = Int" '"Int"'
|
||||
check 901 "type true = Bool" '"Bool"'
|
||||
check 902 'type string lit' '"String"'
|
||||
check 903 "type 1+2 = Int" '"Int"'
|
||||
check 904 "type fun x->x+1 = Int->Int" '"Int -> Int"'
|
||||
check 905 "type fun x->x = poly" ' -> '
|
||||
check 906 "type fun x y->x+y" '"Int -> Int -> Int"'
|
||||
check 907 "type let f x=x+1 in f 10" '"Int"'
|
||||
check 908 "type let id; id 5" '"Int"'
|
||||
check 909 "type let id; id true" '"Bool"'
|
||||
check 910 "type if/then/else" '"Int"'
|
||||
check 911 "type twice" ' -> '
|
||||
check 912 "type bool branch" '"Bool -> Int"'
|
||||
check 913 "type not true" '"Bool"'
|
||||
|
||||
TOTAL=$((PASS + FAIL))
|
||||
if [ $FAIL -eq 0 ]; then
|
||||
echo "ok $PASS/$TOTAL OCaml-on-SX tests passed"
|
||||
|
||||
@@ -204,11 +204,14 @@ SX CEK evaluator (both JS and OCaml hosts)
|
||||
|
||||
### Phase 5 — Hindley-Milner type inference
|
||||
|
||||
- [ ] Algorithm W: `gen`/`inst`, `unify`, `infer-expr`, `infer-decl`.
|
||||
- [ ] Type variables: `'a`, `'b`; unification with occur-check.
|
||||
- [ ] Let-polymorphism: generalise at let-bindings.
|
||||
- [~] Algorithm W: `gen`/`inst` from `lib/guest/hm.sx`, `unify` from
|
||||
`lib/guest/match.sx`, `infer-expr` written here. Covers atoms, var,
|
||||
lambda, app, let, if, op, neg, not. _(Pending: tuples, lists,
|
||||
pattern matching, let-rec, modules.)_
|
||||
- [x] Type variables: `'a`, `'b`; unification with occur-check (kit).
|
||||
- [x] Let-polymorphism: generalise at let-bindings (kit `hm-generalize`).
|
||||
- [ ] ADT types: `type 'a option = None | Some of 'a`.
|
||||
- [ ] Function types, tuple types, record types.
|
||||
- [~] Function types `T1 -> T2` work; tuples/records pending.
|
||||
- [ ] Type signatures: `val f : int -> int` — verify against inferred type.
|
||||
- [ ] Module type checking: seal against `sig` (Phase 4 stubs become real checks).
|
||||
- [ ] Error reporting: position-tagged errors with expected vs actual types.
|
||||
@@ -330,6 +333,17 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means:
|
||||
|
||||
_Newest first._
|
||||
|
||||
- 2026-05-08 Phase 5 — Hindley-Milner type inference, paired-sequencing
|
||||
consumer of `lib/guest/hm.sx` (algebra) and `lib/guest/match.sx`
|
||||
(unify). `lib/ocaml/infer.sx` ships Algorithm W rules for OCaml AST:
|
||||
atoms, var (instantiate), fun (auto-curry through fresh-tv), app
|
||||
(unify against arrow), let (generalize over rhs), if (unify branches),
|
||||
neg/not, op (treat as app of builtin). Builtin env types `+`/`-`/etc.
|
||||
as monomorphic int->int->int and `=`/`<>` as polymorphic 'a->'a->bool.
|
||||
Tested: literals, +1, identity polymorphism `'a -> 'a`, let-poly so
|
||||
`let id = fun x -> x in id true : Bool`, `twice` infers
|
||||
`('a -> 'a) -> 'a -> 'a`. Mandate satisfied: OCaml-on-SX is the
|
||||
deferred second consumer for lib-guest Step 8. 265/265 (+14).
|
||||
- 2026-05-08 Phase 2 — `let ... and ...` mutual recursion at top level.
|
||||
Parser collects all bindings into a list, emitting `(:def-rec-mut)` or
|
||||
`(:def-mut)` when there are 2+. Eval allocates a placeholder cell per
|
||||
|
||||
Reference in New Issue
Block a user