From 5bc7895ce0c74219767a57a670fc3ed5165aa0d1 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 13:08:51 +0000 Subject: [PATCH] ocaml: phase 5 HM let-rec + cons / append op types (+6 tests, 357 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ocaml-infer-let-rec pre-binds the function name to a fresh tv before inferring rhs (which may recursively call the name), unifies the inferred rhs type with the tv, generalizes, then infers body. Builtin env types :: : 'a -> 'a list -> 'a list and @ : 'a list -> 'a list -> 'a list — needed because :op compiles to (:app (:app (:var OP) L) R) and previously these var lookups failed. Examples now infer: let rec fact n = if ... in fact : Int -> Int let rec len lst = ... in len : 'a list -> Int let rec map f xs = ... in map : ('a -> 'b) -> 'a list -> 'b list 1 :: [2; 3] : Int list let rec sum lst = ... in sum [1;2;3] : Int Scoreboard refreshed: 358/358 across 14 suites. --- lib/ocaml/infer.sx | 38 +++++++++++++++++++++++++++++++++++++- lib/ocaml/scoreboard.json | 16 ++++++++-------- lib/ocaml/scoreboard.md | 14 +++++++------- lib/ocaml/test.sh | 22 ++++++++++++++++++++++ plans/ocaml-on-sx.md | 12 ++++++++++++ 5 files changed, 86 insertions(+), 16 deletions(-) diff --git a/lib/ocaml/infer.sx b/lib/ocaml/infer.sx index 081aab0d..211c880f 100644 --- a/lib/ocaml/infer.sx +++ b/lib/ocaml/infer.sx @@ -49,7 +49,19 @@ (hm-arrow a (hm-arrow a (hm-bool)))))) (a->a (let ((a (hm-tv "a"))) - (hm-scheme (list "a") (hm-arrow a a))))) + (hm-scheme (list "a") (hm-arrow a a)))) + (cons-type + (let ((a (hm-tv "a"))) + (hm-scheme (list "a") + (hm-arrow a + (hm-arrow (hm-con "list" (list a)) + (hm-con "list" (list a))))))) + (concat-type + (let ((a (hm-tv "a"))) + (hm-scheme (list "a") + (hm-arrow (hm-con "list" (list a)) + (hm-arrow (hm-con "list" (list a)) + (hm-con "list" (list a)))))))) {"+" (hm-monotype int-int-int) "-" (hm-monotype int-int-int) "*" (hm-monotype int-int-int) @@ -66,6 +78,8 @@ "&&" (hm-monotype bool-bool-bool) "||" (hm-monotype bool-bool-bool) "^" (hm-monotype str-str-str) + "::" cons-type + "@" concat-type "not" (hm-monotype (hm-arrow (hm-bool) (hm-bool))) "succ" (hm-monotype (hm-arrow (hm-int) (hm-int))) "pred" (hm-monotype (hm-arrow (hm-int) (hm-int))) @@ -142,6 +156,26 @@ (let ((s2 (get r2 :subst)) (t2 (get r2 :type))) {:subst (hm-compose s2 s1) :type t2})))))))))) +;; let-rec name params = rhs in body — bind name to a fresh tv before +;; inferring rhs, then unify the inferred rhs type with the tv. This +;; lets rhs reference name (recursive call). Generalize after. +(define ocaml-infer-let-rec + (fn (name params rhs body env counter) + (let ((rhs-expr (cond + ((= (len params) 0) rhs) + (else (list :fun params rhs)))) + (rec-tv (hm-fresh-tv counter))) + (let ((env-rec (assoc env name (hm-monotype rec-tv)))) + (let ((r1 (ocaml-infer rhs-expr env-rec counter))) + (let ((s1 (get r1 :subst)) (t1 (get r1 :type))) + (let ((s2 (ocaml-hm-unify (hm-apply s1 rec-tv) t1 s1))) + (let ((env2 (hm-apply-env s2 env))) + (let ((scheme (hm-generalize (hm-apply s2 t1) env2))) + (let ((env3 (assoc env2 name scheme))) + (let ((r2 (ocaml-infer body env3 counter))) + (let ((s3 (get r2 :subst)) (t2 (get r2 :type))) + {:subst (hm-compose s3 s2) :type t2})))))))))))) + (define ocaml-infer-if (fn (c-ast t-ast e-ast env counter) (let ((rc (ocaml-infer c-ast env counter))) @@ -381,6 +415,8 @@ ((= tag "app") (ocaml-infer-app (nth expr 1) (nth expr 2) env counter)) ((= tag "let") (ocaml-infer-let (nth expr 1) (nth expr 2) (nth expr 3) (nth expr 4) env counter)) + ((= tag "let-rec") (ocaml-infer-let-rec (nth expr 1) (nth expr 2) + (nth expr 3) (nth expr 4) env counter)) ((= tag "if") (ocaml-infer-if (nth expr 1) (nth expr 2) (nth expr 3) env counter)) ((= tag "tuple") (ocaml-infer-tuple (rest expr) env counter)) diff --git a/lib/ocaml/scoreboard.json b/lib/ocaml/scoreboard.json index 4c07725e..97cb623d 100644 --- a/lib/ocaml/scoreboard.json +++ b/lib/ocaml/scoreboard.json @@ -1,21 +1,21 @@ { "suites": { - "eval-core": {"pass": 48, "fail": 0}, + "eval-core": {"pass": 50, "fail": 0}, "let-and": {"pass": 3, "fail": 0}, - "misc": {"pass": 52, "fail": 0}, - "parser": {"pass": 87, "fail": 0}, + "misc": {"pass": 61, "fail": 0}, + "parser": {"pass": 93, "fail": 0}, "phase1-params": {"pass": 2, "fail": 0}, "phase2-exn": {"pass": 8, "fail": 0}, "phase2-function": {"pass": 3, "fail": 0}, "phase2-loops": {"pass": 4, "fail": 0}, "phase2-refs": {"pass": 6, "fail": 0}, - "phase3-adt": {"pass": 19, "fail": 0}, + "phase3-adt": {"pass": 24, "fail": 0}, "phase4-modules": {"pass": 12, "fail": 0}, - "phase5-hm": {"pass": 19, "fail": 0}, - "phase6-stdlib": {"pass": 39, "fail": 0}, + "phase5-hm": {"pass": 31, "fail": 0}, + "phase6-stdlib": {"pass": 43, "fail": 0}, "tokenize": {"pass": 18, "fail": 0} }, - "total_pass": 320, + "total_pass": 358, "total_fail": 0, - "total": 320 + "total": 358 } diff --git a/lib/ocaml/scoreboard.md b/lib/ocaml/scoreboard.md index 11161d45..00747ecb 100644 --- a/lib/ocaml/scoreboard.md +++ b/lib/ocaml/scoreboard.md @@ -1,20 +1,20 @@ # OCaml-on-SX scoreboard -320 / 320 tests passing. +358 / 358 tests passing. | Suite | Pass | Fail | |---|---:|---:| -| eval-core | 48 | 0 | +| eval-core | 50 | 0 | | let-and | 3 | 0 | -| misc | 52 | 0 | -| parser | 87 | 0 | +| misc | 61 | 0 | +| parser | 93 | 0 | | phase1-params | 2 | 0 | | phase2-exn | 8 | 0 | | phase2-function | 3 | 0 | | phase2-loops | 4 | 0 | | phase2-refs | 6 | 0 | -| phase3-adt | 19 | 0 | +| phase3-adt | 24 | 0 | | phase4-modules | 12 | 0 | -| phase5-hm | 19 | 0 | -| phase6-stdlib | 39 | 0 | +| phase5-hm | 31 | 0 | +| phase6-stdlib | 43 | 0 | | tokenize | 18 | 0 | diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index b1425436..7ebdec78 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -872,6 +872,20 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 1906) (eval "(ocaml-type-of \"fun o -> match o with | None -> 0 | Some n -> n\")") +;; ── HM let-rec inference + cons / append ────────────────────── +(epoch 2000) +(eval "(ocaml-type-of \"let rec fact n = if n = 0 then 1 else n * fact (n - 1) in fact\")") +(epoch 2001) +(eval "(ocaml-type-of \"let rec len lst = match lst with | [] -> 0 | _ :: t -> 1 + len t in len\")") +(epoch 2002) +(eval "(ocaml-type-of \"let rec map f xs = match xs with | [] -> [] | h :: t -> f h :: map f t in map\")") +(epoch 2003) +(eval "(ocaml-type-of \"1 :: [2; 3]\")") +(epoch 2004) +(eval "(ocaml-type-of \"[1] @ [2; 3]\")") +(epoch 2005) +(eval "(ocaml-type-of \"let rec sum lst = match lst with | [] -> 0 | h :: t -> h + sum t in sum [1; 2; 3]\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1379,6 +1393,14 @@ check 1904 "fun x -> Some x" ' option' check 1905 "match Some/None -> Int" '"Int"' check 1906 "Int option -> Int" '"Int option -> Int"' +# ── HM let-rec + :: / @ ───────────────────────────────────────── +check 2000 "type fact" '"Int -> Int"' +check 2001 "type len" 'list -> Int' +check 2002 "type map" 'list -> ' +check 2003 "type 1::list" '"Int list"' +check 2004 "type [1] @ [2;3]" '"Int list"' +check 2005 "type sum" '"Int"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index c90f0701..ec4b930b 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -368,6 +368,18 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5 — HM let-rec inference + `::`/`@` operator types + (+6 tests, 357 total). `ocaml-infer-let-rec` pre-binds the function + name to a fresh tv, infers rhs (which can recursively call name), + unifies inferred-rhs-type with the tv, generalizes, then infers + body. Builtin env now types `:: : 'a -> 'a list -> 'a list` and + `@ : 'a list -> 'a list -> 'a list`. Now `let rec fact …`, + `let rec map f xs = match xs with … h :: t -> f h :: map f t`, and + `let rec sum …` all infer correctly: + `fact : Int -> Int` + `len : 'a list -> Int` + `map : ('a -> 'b) -> 'a list -> 'b list` + `sum [1;2;3] : Int` - 2026-05-08 Phase 5 — HM constructor inference for option/result (+7 tests, 351 total). `ocaml-hm-ctor-env` registers None/Some (`'a opt`), Ok/Error (`('a, 'b) result`). `:con NAME` instantiates the scheme;