ocaml: phase 5 parse ctor arg types in user type-defs (+3 tests, 409 total)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 36s

ocaml-hm-parse-type-src recognises primitive type names (int/bool/
string/float/unit), tyvars 'a, and simple parametric T list / T option.
Replaces the previous int-by-default placeholder in
ocaml-hm-register-type-def!.

So 'type tag = TStr of string | TInt of int' correctly registers
TStr : string -> tag and TInt : int -> tag. Pattern-match on tag
gives proper field types in the body. Multi-arg / function types
still fall back to a fresh tv.
This commit is contained in:
2026-05-08 15:43:16 +00:00
parent d61ee088c5
commit 4bca2cacff
3 changed files with 58 additions and 12 deletions

View File

@@ -412,11 +412,35 @@
(set-nth! ocaml-hm-ctors 0
(merge (nth ocaml-hm-ctors 0) (dict name scheme)))))
;; Process a :type-def AST. For each ctor, build its scheme:
;; nullary `A` → scheme [] (con NAME [param-tvs...])
;; ctor `B of int` → scheme [] (int -> (con NAME [...]))
;; Argument types are ignored for now (they're raw source strings) — we
;; assume `int`. A future iteration parses arg types properly.
;; Parse a simple type source into an HM type. Handles primitive type
;; names, type variables `'a`, parametric `'a list`, `T1 * T2`, and
;; function `T1 -> T2`. Unknown tokens default to a fresh tv so the
;; result is at worst polymorphic, never wrong.
(define ocaml-hm-parse-type-src
(fn (src)
(let ((s (trim src)))
(cond
((= s "int") (hm-int))
((= s "bool") (hm-bool))
((= s "string") (hm-string))
((= s "float") (ocaml-hm-float))
((= s "unit") (hm-con "Unit" (list)))
((and (> (len s) 1) (= (nth s 0) "'"))
(hm-tv (slice s 1 (len s))))
;; "T list" / "T option" — split on space, treat last as ctor.
(else
(let ((parts (filter (fn (p) (not (= p ""))) (split s " "))))
(cond
((= (len parts) 2)
(let ((arg (ocaml-hm-parse-type-src (first parts)))
(head (nth parts 1)))
(hm-con head (list arg))))
(else
;; Unknown: emit a fresh tv so unification stays sound.
(hm-tv (str "_unknown"))))))))))
;; Process a :type-def AST. For each ctor, build its scheme. Multi-arg
;; ctors are a list of types — we model that as a tuple arg.
(define ocaml-hm-register-type-def!
(fn (type-def)
(let ((name (nth type-def 1))
@@ -434,11 +458,12 @@
(ocaml-hm-ctor-register! cname
(hm-scheme params self-type)))
(else
;; Single-arg ctor with arg type `int` (placeholder).
;; Multi-arg or other-typed ctors fall back to int.
(ocaml-hm-ctor-register! cname
(hm-scheme params
(hm-arrow (hm-int) self-type))))))))
;; ARG-SRCS is a list of source strings, often a
;; single combined string `T1 * T2 * ...`. Parse.
(let ((arg-type (ocaml-hm-parse-type-src (first arg-srcs))))
(ocaml-hm-ctor-register! cname
(hm-scheme params
(hm-arrow arg-type self-type)))))))))
(for-each register-ctor ctors)))))))
(set! ocaml-infer

View File

@@ -1004,6 +1004,14 @@ cat > "$TMPFILE" << 'EPOCHS'
(epoch 3005)
(eval "(ocaml-run \"String.index_of \\\"hello\\\" \\\"ll\\\"\")")
;; ── HM with parsed ctor arg types ──────────────────────────────
(epoch 3100)
(eval "(ocaml-type-of-program \"type shape = Circle of int | Square of int;; let area s = match s with | Circle r -> r * r | Square s -> s * s;; area\")")
(epoch 3101)
(eval "(ocaml-type-of-program \"type tag = TStr of string | TInt of int;; TStr \\\"hi\\\"\")")
(epoch 3102)
(eval "(ocaml-type-of-program \"type t = A of bool | B of float;; A true\")")
EPOCHS
OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null)
@@ -1588,6 +1596,11 @@ check 3003 "String.split_on_char" '("a" "b" "c")'
check 3004 "String.replace_all" '"herro"'
check 3005 "String.index_of" '2'
# ── HM with parsed ctor arg types ──────────────────────────────
check 3100 "shape -> Int" '"shape -> Int"'
check 3101 "TStr 'hi' : tag" '"tag"'
check 3102 "A true : t" '"t"'
TOTAL=$((PASS + FAIL))
if [ $FAIL -eq 0 ]; then
echo "ok $PASS/$TOTAL OCaml-on-SX tests passed"

View File

@@ -236,8 +236,10 @@ SX CEK evaluator (both JS and OCaml hosts)
`ocaml-hm-register-type-def!` registers user types from `:type-def`.
`ocaml-type-of-program` threads decls through the env, registering
types and binding `let` schemes. `:con NAME` / `:pcon NAME …`
instantiate from the registry. _(Caveat: ctor arg types currently
default to `int` — proper type parsing pending.)_
instantiate from the registry. Ctor arg types parsed via
`ocaml-hm-parse-type-src` — handles primitives (`int`/`bool`/
`string`/`float`/`unit`), tyvars `'a`, simple parametric `T list`/
`T option`. Multi-arg/complex types fall back to a fresh tv.
- [~] Function types `T1 -> T2` work; tuples (`'a * 'b`) and lists
(`'a list`) supported. Records pending.
- [ ] Type signatures: `val f : int -> int` — verify against inferred type.
@@ -377,6 +379,12 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means:
_Newest first._
- 2026-05-08 Phase 5 — parse simple type sources in user type-defs
(+3 tests, 409 total). `ocaml-hm-parse-type-src` recognises
primitive type names, tyvars `'a`, and `T list`/`T option`-style
parametric types. Replaces the old "default to Int" placeholder so
`type t = TStr of string` correctly registers `TStr : string -> t`.
Multi-arg / function types still fall back to a fresh tv.
- 2026-05-08 Phase 6 — String extensions: ends_with/contains/trim/
split_on_char/replace_all/index_of (+6 tests, 406 total). Wraps host
primitives via `_string_*` builtins.