diff --git a/lib/ocaml/infer.sx b/lib/ocaml/infer.sx index 211c880f..c07bf1ef 100644 --- a/lib/ocaml/infer.sx +++ b/lib/ocaml/infer.sx @@ -228,8 +228,8 @@ (define ocaml-infer-pcon (fn (name arg-pats env counter) (cond - ((has-key? ocaml-hm-ctors name) - (let ((ctor-type (hm-instantiate (get ocaml-hm-ctors name) counter)) + ((ocaml-hm-ctor-has? name) + (let ((ctor-type (hm-instantiate (ocaml-hm-ctor-lookup name) counter)) (env-cur env) (subst {})) (let ((cur-type (list nil))) (begin @@ -388,21 +388,60 @@ {:subst subst :type (ocaml-hm-list (hm-apply subst elem-tv))})))))) -(define ocaml-hm-ctors (ocaml-hm-ctor-env)) +;; Mutable cell so user `type` declarations can extend the registry. +(define ocaml-hm-ctors (list (ocaml-hm-ctor-env))) + +(define ocaml-hm-ctor-lookup + (fn (name) (get (nth ocaml-hm-ctors 0) name))) + +(define ocaml-hm-ctor-has? + (fn (name) (has-key? (nth ocaml-hm-ctors 0) name))) + +(define ocaml-hm-ctor-register! + (fn (name scheme) + (set-nth! ocaml-hm-ctors 0 + (merge (nth ocaml-hm-ctors 0) (dict name scheme))))) + +;; Process a :type-def AST. For each ctor, build its scheme: +;; nullary `A` → scheme [] (con NAME [param-tvs...]) +;; ctor `B of int` → scheme [] (int -> (con NAME [...])) +;; Argument types are ignored for now (they're raw source strings) — we +;; assume `int`. A future iteration parses arg types properly. +(define ocaml-hm-register-type-def! + (fn (type-def) + (let ((name (nth type-def 1)) + (params (nth type-def 2)) + (ctors (nth type-def 3))) + (let ((param-tvs (map hm-tv params))) + (let ((self-type (hm-con name param-tvs))) + (begin + (define register-ctor + (fn (ctor) + (let ((cname (first ctor)) + (arg-srcs (rest ctor))) + (cond + ((= (len arg-srcs) 0) + (ocaml-hm-ctor-register! cname + (hm-scheme params self-type))) + (else + ;; Single-arg ctor with arg type `int` (placeholder). + ;; Multi-arg or other-typed ctors fall back to int. + (ocaml-hm-ctor-register! cname + (hm-scheme params + (hm-arrow (hm-int) self-type)))))))) + (for-each register-ctor ctors))))))) (set! ocaml-infer (fn (expr env counter) (let ((tag (nth expr 0))) (cond ((= tag "con") - ;; (:con NAME) — look up constructor type, instantiate fresh. (let ((name (nth expr 1))) (cond - ((has-key? ocaml-hm-ctors name) + ((ocaml-hm-ctor-has? name) {:subst {} - :type (hm-instantiate (get ocaml-hm-ctors name) counter)}) + :type (hm-instantiate (ocaml-hm-ctor-lookup name) counter)}) (else - ;; Unknown ctor — treat as a fresh polymorphic type. {:subst {} :type (hm-fresh-tv counter)})))) ((= tag "int") {:subst {} :type (hm-int)}) ((= tag "float") {:subst {} :type (hm-int)}) ;; treat float as int for now @@ -448,6 +487,61 @@ (let ((r (ocaml-infer expr env counter))) (ocaml-hm-format-type (hm-apply (get r :subst) (get r :type))))))) +;; Program-level type inference: process decls in order, registering +;; type-defs with the ctor registry, threading let-bindings into the +;; env, and returning the type of the last expression-level form. +(define ocaml-type-of-program + (fn (src) + (let ((prog (ocaml-parse-program src)) + (env (ocaml-hm-builtin-env)) + (counter (ocaml-hm-counter)) + (last-type (hm-tv "?"))) + (begin + (define run-decl + (fn (decl) + (let ((tag (nth decl 0))) + (cond + ((= tag "type-def") (ocaml-hm-register-type-def! decl)) + ((= tag "exception-def") nil) + ((= tag "def") + (let ((nm (nth decl 1)) (ps (nth decl 2)) (rh (nth decl 3))) + (let ((rhs-expr (cond + ((= (len ps) 0) rh) + (else (list :fun ps rh))))) + (let ((r (ocaml-infer rhs-expr env counter))) + (let ((s (get r :subst)) (t (get r :type))) + (let ((env2 (hm-apply-env s env))) + (let ((scheme (hm-generalize t env2))) + (begin + (set! env (assoc env2 nm scheme)) + (set! last-type t))))))))) + ((= tag "def-rec") + (let ((nm (nth decl 1)) (ps (nth decl 2)) (rh (nth decl 3))) + (let ((rec-tv (hm-fresh-tv counter))) + (let ((env-rec (assoc env nm (hm-monotype rec-tv))) + (rhs-expr (cond + ((= (len ps) 0) rh) + (else (list :fun ps rh))))) + (let ((r (ocaml-infer rhs-expr env-rec counter))) + (let ((s (get r :subst)) (t (get r :type))) + (let ((s2 (ocaml-hm-unify (hm-apply s rec-tv) t s))) + (let ((env2 (hm-apply-env s2 env))) + (let ((scheme (hm-generalize (hm-apply s2 t) env2))) + (begin + (set! env (assoc env2 nm scheme)) + (set! last-type t))))))))))) + ((= tag "expr") + (let ((r (ocaml-infer (nth decl 1) env counter))) + (set! last-type + (hm-apply (get r :subst) (get r :type))))) + (else nil))))) + (define loop + (fn (xs) + (when (not (= xs (list))) + (begin (run-decl (first xs)) (loop (rest xs)))))) + (loop (rest prog)) + (ocaml-hm-format-type last-type))))) + ;; Pretty-print a type as an OCaml-style string for testing. Only handles ;; the constructors we use: Int / Bool / String / Unit / -> / type-vars. (define ocaml-hm-format-type diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 7ebdec78..4a6ad7b0 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -886,6 +886,20 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 2005) (eval "(ocaml-type-of \"let rec sum lst = match lst with | [] -> 0 | h :: t -> h + sum t in sum [1; 2; 3]\")") +;; ── HM with user type declarations ───────────────────────────── +(epoch 2100) +(eval "(ocaml-type-of-program \"type color = Red | Green | Blue;; Red\")") +(epoch 2101) +(eval "(ocaml-type-of-program \"type shape = Circle of int | Square of int;; Circle 5\")") +(epoch 2102) +(eval "(ocaml-type-of-program \"type color = Red | Green;; let c = Red;; c\")") +(epoch 2103) +(eval "(ocaml-type-of-program \"type shape = Circle of int | Square of int;; let area s = match s with | Circle r -> r * r | Square s -> s * s;; area\")") +(epoch 2104) +(eval "(ocaml-type-of-program \"let x = 1;; let y = 2;; x + y\")") +(epoch 2105) +(eval "(ocaml-type-of-program \"let rec fact n = if n = 0 then 1 else n * fact (n - 1);; fact 5\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1401,6 +1415,14 @@ check 2003 "type 1::list" '"Int list"' check 2004 "type [1] @ [2;3]" '"Int list"' check 2005 "type sum" '"Int"' +# ── HM with user type-defs ────────────────────────────────────── +check 2100 "user type Red : color" '"color"' +check 2101 "user type Circle 5 : shape" '"shape"' +check 2102 "let c = Red; c" '"color"' +check 2103 "shape -> Int" '"shape -> Int"' +check 2104 "program x+y" '"Int"' +check 2105 "program fact 5" '"Int"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ec4b930b..328791eb 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -227,10 +227,12 @@ SX CEK evaluator (both JS and OCaml hosts) pattern matching, let-rec, modules.)_ - [x] Type variables: `'a`, `'b`; unification with occur-check (kit). - [x] Let-polymorphism: generalise at let-bindings (kit `hm-generalize`). -- [~] ADT types: `option`/`result` ctors hardcoded in - `ocaml-hm-ctor-env`; `:con NAME` and `:pcon NAME …` look up the - scheme and instantiate. User type-defs would extend the registry. - Format: `Int option`, `(Int, 'b) result`. +- [x] ADT types: `option`/`result` ctors seeded; + `ocaml-hm-register-type-def!` registers user types from `:type-def`. + `ocaml-type-of-program` threads decls through the env, registering + types and binding `let` schemes. `:con NAME` / `:pcon NAME …` + instantiate from the registry. _(Caveat: ctor arg types currently + default to `int` — proper type parsing pending.)_ - [~] Function types `T1 -> T2` work; tuples (`'a * 'b`) and lists (`'a list`) supported. Records pending. - [ ] Type signatures: `val f : int -> int` — verify against inferred type. @@ -368,6 +370,18 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5 — HM with user `type` declarations (+6 tests, 363 + total). `ocaml-hm-ctors` is now a mutable list cell; user type-defs + register their constructors via `ocaml-hm-register-type-def!`. New + `ocaml-type-of-program` processes top-level decls in order: type-def + registers ctors, def/def-rec generalize, exception-def is a no-op, + expr returns its inferred type. Examples: + `type color = Red | Green | Blue;; Red : color` + `type shape = Circle of int | Square of int;; let area s = match s + with | Circle r -> r * r | Square s -> s * s;; area : shape -> Int` + Caveat: ctor arg types parsed as raw source strings; the registry + defaults to `int` for any single-arg ctor. Proper type-source parsing + is pending. - 2026-05-08 Phase 5 — HM let-rec inference + `::`/`@` operator types (+6 tests, 357 total). `ocaml-infer-let-rec` pre-binds the function name to a fresh tv, infers rhs (which can recursively call name),