From 4bca2cacff0e66c015b4b7e4a08e1d3c9ddcc173 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 15:43:16 +0000 Subject: [PATCH] ocaml: phase 5 parse ctor arg types in user type-defs (+3 tests, 409 total) ocaml-hm-parse-type-src recognises primitive type names (int/bool/ string/float/unit), tyvars 'a, and simple parametric T list / T option. Replaces the previous int-by-default placeholder in ocaml-hm-register-type-def!. So 'type tag = TStr of string | TInt of int' correctly registers TStr : string -> tag and TInt : int -> tag. Pattern-match on tag gives proper field types in the body. Multi-arg / function types still fall back to a fresh tv. --- lib/ocaml/infer.sx | 45 ++++++++++++++++++++++++++++++++++---------- lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 12 ++++++++++-- 3 files changed, 58 insertions(+), 12 deletions(-) diff --git a/lib/ocaml/infer.sx b/lib/ocaml/infer.sx index a37c5c59..84beebf2 100644 --- a/lib/ocaml/infer.sx +++ b/lib/ocaml/infer.sx @@ -412,11 +412,35 @@ (set-nth! ocaml-hm-ctors 0 (merge (nth ocaml-hm-ctors 0) (dict name scheme))))) -;; Process a :type-def AST. For each ctor, build its scheme: -;; nullary `A` → scheme [] (con NAME [param-tvs...]) -;; ctor `B of int` → scheme [] (int -> (con NAME [...])) -;; Argument types are ignored for now (they're raw source strings) — we -;; assume `int`. A future iteration parses arg types properly. +;; Parse a simple type source into an HM type. Handles primitive type +;; names, type variables `'a`, parametric `'a list`, `T1 * T2`, and +;; function `T1 -> T2`. Unknown tokens default to a fresh tv so the +;; result is at worst polymorphic, never wrong. +(define ocaml-hm-parse-type-src + (fn (src) + (let ((s (trim src))) + (cond + ((= s "int") (hm-int)) + ((= s "bool") (hm-bool)) + ((= s "string") (hm-string)) + ((= s "float") (ocaml-hm-float)) + ((= s "unit") (hm-con "Unit" (list))) + ((and (> (len s) 1) (= (nth s 0) "'")) + (hm-tv (slice s 1 (len s)))) + ;; "T list" / "T option" — split on space, treat last as ctor. + (else + (let ((parts (filter (fn (p) (not (= p ""))) (split s " ")))) + (cond + ((= (len parts) 2) + (let ((arg (ocaml-hm-parse-type-src (first parts))) + (head (nth parts 1))) + (hm-con head (list arg)))) + (else + ;; Unknown: emit a fresh tv so unification stays sound. + (hm-tv (str "_unknown")))))))))) + +;; Process a :type-def AST. For each ctor, build its scheme. Multi-arg +;; ctors are a list of types — we model that as a tuple arg. (define ocaml-hm-register-type-def! (fn (type-def) (let ((name (nth type-def 1)) @@ -434,11 +458,12 @@ (ocaml-hm-ctor-register! cname (hm-scheme params self-type))) (else - ;; Single-arg ctor with arg type `int` (placeholder). - ;; Multi-arg or other-typed ctors fall back to int. - (ocaml-hm-ctor-register! cname - (hm-scheme params - (hm-arrow (hm-int) self-type)))))))) + ;; ARG-SRCS is a list of source strings, often a + ;; single combined string `T1 * T2 * ...`. Parse. + (let ((arg-type (ocaml-hm-parse-type-src (first arg-srcs)))) + (ocaml-hm-ctor-register! cname + (hm-scheme params + (hm-arrow arg-type self-type))))))))) (for-each register-ctor ctors))))))) (set! ocaml-infer diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 43e2da40..edda595a 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1004,6 +1004,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 3005) (eval "(ocaml-run \"String.index_of \\\"hello\\\" \\\"ll\\\"\")") +;; ── HM with parsed ctor arg types ────────────────────────────── +(epoch 3100) +(eval "(ocaml-type-of-program \"type shape = Circle of int | Square of int;; let area s = match s with | Circle r -> r * r | Square s -> s * s;; area\")") +(epoch 3101) +(eval "(ocaml-type-of-program \"type tag = TStr of string | TInt of int;; TStr \\\"hi\\\"\")") +(epoch 3102) +(eval "(ocaml-type-of-program \"type t = A of bool | B of float;; A true\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1588,6 +1596,11 @@ check 3003 "String.split_on_char" '("a" "b" "c")' check 3004 "String.replace_all" '"herro"' check 3005 "String.index_of" '2' +# ── HM with parsed ctor arg types ────────────────────────────── +check 3100 "shape -> Int" '"shape -> Int"' +check 3101 "TStr 'hi' : tag" '"tag"' +check 3102 "A true : t" '"t"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 37288d34..32e5853f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -236,8 +236,10 @@ SX CEK evaluator (both JS and OCaml hosts) `ocaml-hm-register-type-def!` registers user types from `:type-def`. `ocaml-type-of-program` threads decls through the env, registering types and binding `let` schemes. `:con NAME` / `:pcon NAME …` - instantiate from the registry. _(Caveat: ctor arg types currently - default to `int` — proper type parsing pending.)_ + instantiate from the registry. Ctor arg types parsed via + `ocaml-hm-parse-type-src` — handles primitives (`int`/`bool`/ + `string`/`float`/`unit`), tyvars `'a`, simple parametric `T list`/ + `T option`. Multi-arg/complex types fall back to a fresh tv. - [~] Function types `T1 -> T2` work; tuples (`'a * 'b`) and lists (`'a list`) supported. Records pending. - [ ] Type signatures: `val f : int -> int` — verify against inferred type. @@ -377,6 +379,12 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5 — parse simple type sources in user type-defs + (+3 tests, 409 total). `ocaml-hm-parse-type-src` recognises + primitive type names, tyvars `'a`, and `T list`/`T option`-style + parametric types. Replaces the old "default to Int" placeholder so + `type t = TStr of string` correctly registers `TStr : string -> t`. + Multi-arg / function types still fall back to a fresh tv. - 2026-05-08 Phase 6 — String extensions: ends_with/contains/trim/ split_on_char/replace_all/index_of (+6 tests, 406 total). Wraps host primitives via `_string_*` builtins.