diff --git a/lib/ocaml/infer.sx b/lib/ocaml/infer.sx new file mode 100644 index 00000000..86cd4b39 --- /dev/null +++ b/lib/ocaml/infer.sx @@ -0,0 +1,209 @@ +;; lib/ocaml/infer.sx — Algorithm W type inference for OCaml-on-SX. +;; +;; Consumes lib/guest/hm.sx (algebra) and lib/guest/match.sx (unify) per +;; the Phase 5 sequencing. The kit ships fresh-tv, generalize, +;; instantiate, and substitution composition; this file assembles the +;; lambda / app / let / if rules of Algorithm W against the OCaml AST. +;; +;; Coverage in this slice (atoms + core forms): +;; :int :float :string :char :bool :unit :var :fun :app :let :if +;; :op (with builtin signatures for +, -, *, /, mod, comparisons, &&, ||) +;; +;; Out of scope: pattern matching, tuples, lists (need product/list types +;; first), records, modules, ADTs, let-rec. +;; +;; Inference state: +;; env — dict: name → scheme +;; counter — one-element list (mutable cell) used by hm-fresh-tv +;; +;; Returned value: {:subst S :type T}. + +(define ocaml-hm-counter (fn () (list 0))) + +(define ocaml-hm-empty-subst (fn () {})) + +(define ocaml-hm-builtin-env + (fn () + (let ((int-int-int (hm-arrow (hm-int) (hm-arrow (hm-int) (hm-int)))) + (int-int-bool (hm-arrow (hm-int) (hm-arrow (hm-int) (hm-bool)))) + (bool-bool-bool (hm-arrow (hm-bool) (hm-arrow (hm-bool) (hm-bool)))) + (str-str-str (hm-arrow (hm-string) (hm-arrow (hm-string) (hm-string)))) + (any-any-bool + (let ((a (hm-tv "a"))) + (hm-scheme (list "a") + (hm-arrow a (hm-arrow a (hm-bool)))))) + (a->a + (let ((a (hm-tv "a"))) + (hm-scheme (list "a") (hm-arrow a a))))) + {"+" (hm-monotype int-int-int) + "-" (hm-monotype int-int-int) + "*" (hm-monotype int-int-int) + "/" (hm-monotype int-int-int) + "mod" (hm-monotype int-int-int) + "%" (hm-monotype int-int-int) + "**" (hm-monotype int-int-int) + "<" (hm-monotype int-int-bool) + ">" (hm-monotype int-int-bool) + "<=" (hm-monotype int-int-bool) + ">=" (hm-monotype int-int-bool) + "=" any-any-bool + "<>" any-any-bool + "&&" (hm-monotype bool-bool-bool) + "||" (hm-monotype bool-bool-bool) + "^" (hm-monotype str-str-str) + "not" (hm-monotype (hm-arrow (hm-bool) (hm-bool))) + "succ" (hm-monotype (hm-arrow (hm-int) (hm-int))) + "pred" (hm-monotype (hm-arrow (hm-int) (hm-int))) + "abs" (hm-monotype (hm-arrow (hm-int) (hm-int)))}))) + +(define ocaml-infer (fn (expr env counter) nil)) + +;; Unify two types; raise on failure. The match.sx unify returns nil on +;; failure so we wrap it for clearer errors. +(define ocaml-hm-unify + (fn (t1 t2 subst) + (let ((s2 (unify t1 t2 subst))) + (cond + ((= s2 nil) + (error (str "ocaml-infer: cannot unify " t1 " with " t2))) + (else s2))))) + +;; Look up name; instantiate scheme to a fresh monotype. +(define ocaml-infer-var + (fn (name env counter) + (cond + ((has-key? env name) + (let ((scheme (get env name))) + (let ((t (hm-instantiate scheme counter))) + {:subst {} :type t}))) + (else (error (str "ocaml-infer: unbound variable " name)))))) + +(define ocaml-infer-app + (fn (fn-expr arg-expr env counter) + (let ((r1 (ocaml-infer fn-expr env counter))) + (let ((s1 (get r1 :subst)) (t1 (get r1 :type))) + (let ((env2 (hm-apply-env s1 env))) + (let ((r2 (ocaml-infer arg-expr env2 counter))) + (let ((s2 (get r2 :subst)) (t2 (get r2 :type))) + (let ((tv (hm-fresh-tv counter))) + (let ((s3 (ocaml-hm-unify + (hm-apply s2 t1) + (hm-arrow t2 tv) + (hm-compose s2 s1)))) + {:subst s3 :type (hm-apply s3 tv)}))))))))) + +(define ocaml-infer-fun + (fn (params body env counter) + (cond + ((= (len params) 0) + (error "ocaml-infer: fun without params")) + ((= (len params) 1) + (let ((tv (hm-fresh-tv counter))) + (let ((env2 (assoc env (first params) (hm-monotype tv)))) + (let ((r (ocaml-infer body env2 counter))) + (let ((s (get r :subst)) (t-body (get r :type))) + {:subst s + :type (hm-arrow (hm-apply s tv) t-body)}))))) + (else + ;; Curry: fun x y -> e ≡ fun x -> fun y -> e + (let ((tv (hm-fresh-tv counter))) + (let ((env2 (assoc env (first params) (hm-monotype tv)))) + (let ((r (ocaml-infer-fun (rest params) body env2 counter))) + (let ((s (get r :subst)) (t-rest (get r :type))) + {:subst s + :type (hm-arrow (hm-apply s tv) t-rest)})))))))) + +(define ocaml-infer-let + (fn (name params rhs body env counter) + (let ((rhs-expr (cond + ((= (len params) 0) rhs) + (else (list :fun params rhs))))) + (let ((r1 (ocaml-infer rhs-expr env counter))) + (let ((s1 (get r1 :subst)) (t1 (get r1 :type))) + (let ((env2 (hm-apply-env s1 env))) + (let ((scheme (hm-generalize t1 env2))) + (let ((env3 (assoc env2 name scheme))) + (let ((r2 (ocaml-infer body env3 counter))) + (let ((s2 (get r2 :subst)) (t2 (get r2 :type))) + {:subst (hm-compose s2 s1) :type t2})))))))))) + +(define ocaml-infer-if + (fn (c-ast t-ast e-ast env counter) + (let ((rc (ocaml-infer c-ast env counter))) + (let ((sc (get rc :subst)) (tc (get rc :type))) + (let ((sc2 (ocaml-hm-unify tc (hm-bool) sc))) + (let ((env2 (hm-apply-env sc2 env))) + (let ((rt (ocaml-infer t-ast env2 counter))) + (let ((st (get rt :subst)) (tt (get rt :type))) + (let ((env3 (hm-apply-env st env2))) + (let ((re (ocaml-infer e-ast env3 counter))) + (let ((se (get re :subst)) (te (get re :type))) + (let ((sf (ocaml-hm-unify + (hm-apply se tt) + te + (hm-compose se (hm-compose st sc2))))) + {:subst sf + :type (hm-apply sf te)})))))))))))) + +(set! ocaml-infer + (fn (expr env counter) + (let ((tag (nth expr 0))) + (cond + ((= tag "int") {:subst {} :type (hm-int)}) + ((= tag "float") {:subst {} :type (hm-int)}) ;; treat float as int for now + ((= tag "string") {:subst {} :type (hm-string)}) + ((= tag "char") {:subst {} :type (hm-string)}) + ((= tag "bool") {:subst {} :type (hm-bool)}) + ((= tag "unit") {:subst {} :type (hm-con "Unit" (list))}) + ((= tag "var") (ocaml-infer-var (nth expr 1) env counter)) + ((= tag "fun") (ocaml-infer-fun (nth expr 1) (nth expr 2) env counter)) + ((= tag "app") (ocaml-infer-app (nth expr 1) (nth expr 2) env counter)) + ((= tag "let") (ocaml-infer-let (nth expr 1) (nth expr 2) + (nth expr 3) (nth expr 4) env counter)) + ((= tag "if") (ocaml-infer-if (nth expr 1) (nth expr 2) + (nth expr 3) env counter)) + ((= tag "neg") + (let ((r (ocaml-infer (nth expr 1) env counter))) + (let ((s (get r :subst)) (t (get r :type))) + (let ((s2 (ocaml-hm-unify t (hm-int) s))) + {:subst s2 :type (hm-int)})))) + ((= tag "not") + (let ((r (ocaml-infer (nth expr 1) env counter))) + (let ((s (get r :subst)) (t (get r :type))) + (let ((s2 (ocaml-hm-unify t (hm-bool) s))) + {:subst s2 :type (hm-bool)})))) + ((= tag "op") + ;; Treat (:op OP L R) as (:app (:app (:var OP) L) R) — same rule. + (ocaml-infer + (list :app (list :app (list :var (nth expr 1)) (nth expr 2)) (nth expr 3)) + env counter)) + (else (error (str "ocaml-infer: unsupported tag " tag))))))) + +;; Top-level convenience: parse + infer + render the type. +(define ocaml-type-of + (fn (src) + (let ((expr (ocaml-parse src)) + (env (ocaml-hm-builtin-env)) + (counter (ocaml-hm-counter))) + (let ((r (ocaml-infer expr env counter))) + (ocaml-hm-format-type (hm-apply (get r :subst) (get r :type))))))) + +;; Pretty-print a type as an OCaml-style string for testing. Only handles +;; the constructors we use: Int / Bool / String / Unit / -> / type-vars. +(define ocaml-hm-format-type + (fn (t) + (cond + ((is-var? t) (str "'" (var-name t))) + ((is-ctor? t) + (let ((head (ctor-head t)) (args (ctor-args t))) + (cond + ((= head "->") + (let ((a (nth args 0)) (b (nth args 1))) + (str + (cond + ((and (is-ctor? a) (= (ctor-head a) "->")) + (str "(" (ocaml-hm-format-type a) ")")) + (else (ocaml-hm-format-type a))) + " -> " (ocaml-hm-format-type b)))) + (else head)))) + (else (str t))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 0be261ea..0b8da7cf 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -30,10 +30,13 @@ cat > "$TMPFILE" << 'EPOCHS' (load "lib/guest/lex.sx") (load "lib/guest/prefix.sx") (load "lib/guest/pratt.sx") +(load "lib/guest/match.sx") +(load "lib/guest/hm.sx") (load "lib/ocaml/tokenizer.sx") (load "lib/ocaml/parser.sx") (load "lib/ocaml/eval.sx") (load "lib/ocaml/runtime.sx") +(load "lib/ocaml/infer.sx") (load "lib/ocaml/tests/tokenize.sx") (eval "(ocaml-load-stdlib!)") @@ -639,6 +642,36 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 852) (eval "(ocaml-run-program \"let x = 1 and y = 2;; x + y\")") +;; ── Phase 5: Hindley-Milner type inference ──────────────────── +(epoch 900) +(eval "(ocaml-type-of \"42\")") +(epoch 901) +(eval "(ocaml-type-of \"true\")") +(epoch 902) +(eval "(ocaml-type-of \"\\\"hi\\\"\")") +(epoch 903) +(eval "(ocaml-type-of \"1 + 2\")") +(epoch 904) +(eval "(ocaml-type-of \"fun x -> x + 1\")") +(epoch 905) +(eval "(ocaml-type-of \"fun x -> x\")") +(epoch 906) +(eval "(ocaml-type-of \"fun x y -> x + y\")") +(epoch 907) +(eval "(ocaml-type-of \"let f x = x + 1 in f 10\")") +(epoch 908) +(eval "(ocaml-type-of \"let id = fun x -> x in id 5\")") +(epoch 909) +(eval "(ocaml-type-of \"let id = fun x -> x in id true\")") +(epoch 910) +(eval "(ocaml-type-of \"if true then 1 else 2\")") +(epoch 911) +(eval "(ocaml-type-of \"fun f -> fun x -> f (f x)\")") +(epoch 912) +(eval "(ocaml-type-of \"fun b -> if b then 1 else 0\")") +(epoch 913) +(eval "(ocaml-type-of \"not true\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1016,6 +1049,22 @@ check 850 "even 10 (mutual rec)" 'true' check 851 "odd 7 (mutual rec)" 'true' check 852 "let x = 1 and y = 2" '3' +# ── Phase 5: Hindley-Milner type inference ──────────────────── +check 900 "type 42 = Int" '"Int"' +check 901 "type true = Bool" '"Bool"' +check 902 'type string lit' '"String"' +check 903 "type 1+2 = Int" '"Int"' +check 904 "type fun x->x+1 = Int->Int" '"Int -> Int"' +check 905 "type fun x->x = poly" ' -> ' +check 906 "type fun x y->x+y" '"Int -> Int -> Int"' +check 907 "type let f x=x+1 in f 10" '"Int"' +check 908 "type let id; id 5" '"Int"' +check 909 "type let id; id true" '"Bool"' +check 910 "type if/then/else" '"Int"' +check 911 "type twice" ' -> ' +check 912 "type bool branch" '"Bool -> Int"' +check 913 "type not true" '"Bool"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 26694a4e..06e288d5 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -204,11 +204,14 @@ SX CEK evaluator (both JS and OCaml hosts) ### Phase 5 — Hindley-Milner type inference -- [ ] Algorithm W: `gen`/`inst`, `unify`, `infer-expr`, `infer-decl`. -- [ ] Type variables: `'a`, `'b`; unification with occur-check. -- [ ] Let-polymorphism: generalise at let-bindings. +- [~] Algorithm W: `gen`/`inst` from `lib/guest/hm.sx`, `unify` from + `lib/guest/match.sx`, `infer-expr` written here. Covers atoms, var, + lambda, app, let, if, op, neg, not. _(Pending: tuples, lists, + pattern matching, let-rec, modules.)_ +- [x] Type variables: `'a`, `'b`; unification with occur-check (kit). +- [x] Let-polymorphism: generalise at let-bindings (kit `hm-generalize`). - [ ] ADT types: `type 'a option = None | Some of 'a`. -- [ ] Function types, tuple types, record types. +- [~] Function types `T1 -> T2` work; tuples/records pending. - [ ] Type signatures: `val f : int -> int` — verify against inferred type. - [ ] Module type checking: seal against `sig` (Phase 4 stubs become real checks). - [ ] Error reporting: position-tagged errors with expected vs actual types. @@ -330,6 +333,17 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5 — Hindley-Milner type inference, paired-sequencing + consumer of `lib/guest/hm.sx` (algebra) and `lib/guest/match.sx` + (unify). `lib/ocaml/infer.sx` ships Algorithm W rules for OCaml AST: + atoms, var (instantiate), fun (auto-curry through fresh-tv), app + (unify against arrow), let (generalize over rhs), if (unify branches), + neg/not, op (treat as app of builtin). Builtin env types `+`/`-`/etc. + as monomorphic int->int->int and `=`/`<>` as polymorphic 'a->'a->bool. + Tested: literals, +1, identity polymorphism `'a -> 'a`, let-poly so + `let id = fun x -> x in id true : Bool`, `twice` infers + `('a -> 'a) -> 'a -> 'a`. Mandate satisfied: OCaml-on-SX is the + deferred second consumer for lib-guest Step 8. 265/265 (+14). - 2026-05-08 Phase 2 — `let ... and ...` mutual recursion at top level. Parser collects all bindings into a list, emitting `(:def-rec-mut)` or `(:def-mut)` when there are 2+. Eval allocates a placeholder cell per