From 355a482dfe534017b2d2c621f74b67c4d9d229d1 Mon Sep 17 00:00:00 2001 From: giles Date: Tue, 30 Jun 2026 13:19:00 +0000 Subject: [PATCH] erlang: lists:sort/1,2 + lists:usort/1 with full term order (lists_ext suite, 788/788) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New lib/erlang/lists-ext.sx (loaded after runtime.sx): stable merge sort over an SX-list bridge. sort/1 and usort/1 use full Erlang term order via a self-contained er-ext-lt? (deep tuple/list compare that the shared er-lt? lacks); sort/2 takes a fun(A,B)->bool comparator. Registration wraps er-register-builtin-bifs! so the BIFs survive the mid-run registry resets done by tests/runtime.sx. Roadmap is saturated within this loop's scope; this is forever-loop stdlib hardening. New file forced by the broken sx-tree write tools in this worktree (see Blockers) — authored via Write + sx_validate. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/erlang/conformance.sh | 5 ++ lib/erlang/lists-ext.sx | 162 ++++++++++++++++++++++++++++++++++ lib/erlang/scoreboard.json | 7 +- lib/erlang/scoreboard.md | 3 +- lib/erlang/tests/lists_ext.sx | 76 ++++++++++++++++ plans/erlang-on-sx.md | 4 + 6 files changed, 253 insertions(+), 4 deletions(-) create mode 100644 lib/erlang/lists-ext.sx create mode 100644 lib/erlang/tests/lists_ext.sx diff --git a/lib/erlang/conformance.sh b/lib/erlang/conformance.sh index 334b6d36..acc5e57c 100755 --- a/lib/erlang/conformance.sh +++ b/lib/erlang/conformance.sh @@ -39,6 +39,7 @@ SUITES=( "ffi|er-ffi-test-pass|er-ffi-test-count" "vm|er-vm-test-pass|er-vm-test-count" "send_after|er-sa-test-pass|er-sa-test-count" + "lists_ext|er-lx-test-pass|er-lx-test-count" ) cat > "$TMPFILE" << 'EPOCHS' @@ -50,6 +51,7 @@ cat > "$TMPFILE" << 'EPOCHS' (load "lib/erlang/parser-module.sx") (load "lib/erlang/transpile.sx") (load "lib/erlang/runtime.sx") +(load "lib/erlang/lists-ext.sx") (load "lib/erlang/tests/tokenize.sx") (load "lib/erlang/tests/parse.sx") (load "lib/erlang/tests/eval.sx") @@ -63,6 +65,7 @@ cat > "$TMPFILE" << 'EPOCHS' (load "lib/erlang/tests/ffi.sx") (load "lib/erlang/tests/vm.sx") (load "lib/erlang/tests/send_after.sx") +(load "lib/erlang/tests/lists_ext.sx") (epoch 100) (eval "(list er-test-pass er-test-count)") (epoch 101) @@ -87,6 +90,8 @@ cat > "$TMPFILE" << 'EPOCHS' (eval "(list er-vm-test-pass er-vm-test-count)") (epoch 111) (eval "(list er-sa-test-pass er-sa-test-count)") +(epoch 112) +(eval "(list er-lx-test-pass er-lx-test-count)") EPOCHS timeout 600 "$SX_SERVER" < "$TMPFILE" > "$OUTFILE" 2>&1 diff --git a/lib/erlang/lists-ext.sx b/lib/erlang/lists-ext.sx new file mode 100644 index 00000000..e187203c --- /dev/null +++ b/lib/erlang/lists-ext.sx @@ -0,0 +1,162 @@ +;; lib/erlang/lists-ext.sx — extra `lists` module BIFs. +;; +;; Loaded AFTER runtime.sx so the BIF registry + transpile helpers +;; (er-mk-cons, er-lt?, er-equal?, er-bool, er-truthy?, er-apply-fun, +;; er-bif-arg1, er-mk-error-marker, er-mk-atom, er-cons?, er-nil?) and +;; runtime's er-register-pure-bif! are all in scope. Registrations run +;; at load time and persist in the global er-bif-registry cell. +;; +;; Edit-tool note: the sx-tree write tools raise yojson "Expected +;; string, got null" in this worktree, so new lists BIFs land here (a +;; fresh file) rather than as in-place edits to the ~1900-line +;; transpile.sx. Same separate-file pattern the VM dispatcher already +;; uses (lib/erlang/vm/dispatcher.sx). + +;; ── cons <-> SX-list bridges ────────────────────────────────────── +(define + er-cons->sxlist + (fn (lst) + (cond + (er-nil? lst) (list) + (er-cons? lst) (cons (get lst :head) (er-cons->sxlist (get lst :tail))) + :else (raise (er-mk-error-marker (er-mk-atom "badarg")))))) + +(define + er-sxlist->cons + (fn (xs) + (if (= (len xs) 0) + (er-mk-nil) + (er-mk-cons (first xs) (er-sxlist->cons (rest xs)))))) + +;; ── merge sort over SX lists (stable) ───────────────────────────── +(define + er-ext-take + (fn (xs n) + (if (or (= n 0) (= (len xs) 0)) + (list) + (cons (first xs) (er-ext-take (rest xs) (- n 1)))))) + +(define + er-ext-drop + (fn (xs n) + (if (or (= n 0) (= (len xs) 0)) + xs + (er-ext-drop (rest xs) (- n 1))))) + +;; le? returns a truthy value (Erlang bool atom or SX bool) iff a +;; should sort at-or-before b. Taking from the left half first on a +;; true result keeps the sort stable. +(define + er-ext-merge + (fn (a b le?) + (cond + (= (len a) 0) b + (= (len b) 0) a + (er-truthy? (le? (first a) (first b))) + (cons (first a) (er-ext-merge (rest a) b le?)) + :else (cons (first b) (er-ext-merge a (rest b) le?))))) + +(define + er-ext-msort + (fn (xs le?) + (if (<= (len xs) 1) + xs + (let ((mid (quotient (len xs) 2))) + (er-ext-merge + (er-ext-msort (er-ext-take xs mid) le?) + (er-ext-msort (er-ext-drop xs mid) le?) + le?))))) + +;; Full Erlang term order. The shared er-lt? (transpile.sx) only +;; deep-compares numbers/atoms/strings and otherwise falls back to a +;; coarse type rank — so any two tuples (or two lists) compare as +;; order-equal there. er-ext-lt? adds the missing structural cases: +;; tuples by arity then elementwise, lists elementwise with a shorter +;; proper prefix sorting first. Cross-type cases delegate to er-lt?. +(define + er-ext-lt-seq + (fn (ea eb i) + (cond + (>= i (len ea)) false + (er-ext-lt? (nth ea i) (nth eb i)) true + (er-ext-lt? (nth eb i) (nth ea i)) false + :else (er-ext-lt-seq ea eb (+ i 1))))) + +(define + er-ext-lt? + (fn (a b) + (cond + (and (er-tuple? a) (er-tuple? b)) + (let ((ea (get a :elements)) (eb (get b :elements))) + (cond + (< (len ea) (len eb)) true + (> (len ea) (len eb)) false + :else (er-ext-lt-seq ea eb 0))) + (and (er-cons? a) (er-cons? b)) + (cond + (er-ext-lt? (get a :head) (get b :head)) true + (er-ext-lt? (get b :head) (get a :head)) false + :else (er-ext-lt? (get a :tail) (get b :tail))) + (and (er-nil? a) (er-cons? b)) true + (and (er-cons? a) (er-nil? b)) false + (and (er-nil? a) (er-nil? b)) false + :else (er-lt? a b)))) + +;; Default Erlang term order: a =< b == not (b < a). +(define + er-ext-term-le + (fn (a b) (er-bool (not (er-ext-lt? b a))))) + +;; ── lists:sort/1, lists:sort/2 ──────────────────────────────────── +(define + er-bif-lists-sort + (fn (vs) + (cond + (= (len vs) 1) + (er-sxlist->cons + (er-ext-msort (er-cons->sxlist (nth vs 0)) er-ext-term-le)) + (= (len vs) 2) + (let ((f (nth vs 0)) (lst (nth vs 1))) + (er-sxlist->cons + (er-ext-msort + (er-cons->sxlist lst) + (fn (a b) (er-apply-fun f (list a b)))))) + :else (error "Erlang: lists:sort: wrong arity")))) + +;; ── lists:usort/1 (sort then drop adjacent term-equal dups) ─────── +(define + er-ext-dedup + (fn (xs) + (cond + (= (len xs) 0) (list) + (= (len xs) 1) xs + (er-equal? (first xs) (nth xs 1)) (er-ext-dedup (rest xs)) + :else (cons (first xs) (er-ext-dedup (rest xs)))))) + +(define + er-bif-lists-usort + (fn (vs) + (let ((lst (er-bif-arg1 vs "lists:usort"))) + (er-sxlist->cons + (er-ext-dedup + (er-ext-msort (er-cons->sxlist lst) er-ext-term-le)))))) + +;; ── register ────────────────────────────────────────────────────── +;; Hook into er-register-builtin-bifs! rather than registering once: +;; the registry can be reset + rebuilt mid-run (tests/runtime.sx does +;; this), and a plain one-shot registration would be wiped. Wrapping +;; the rebuild fn means these BIFs are re-added on every reset. +(define er-ext-lists-register! + (fn () + (er-register-pure-bif! "lists" "sort" 1 er-bif-lists-sort) + (er-register-pure-bif! "lists" "sort" 2 er-bif-lists-sort) + (er-register-pure-bif! "lists" "usort" 1 er-bif-lists-usort))) + +(define er-ext-prev-register-builtins er-register-builtin-bifs!) +(define er-register-builtin-bifs! + (fn () + (er-ext-prev-register-builtins) + (er-ext-lists-register!))) + +;; register into the currently-live registry too +(er-ext-lists-register!) diff --git a/lib/erlang/scoreboard.json b/lib/erlang/scoreboard.json index 614cd84c..1d893b91 100644 --- a/lib/erlang/scoreboard.json +++ b/lib/erlang/scoreboard.json @@ -1,7 +1,7 @@ { "language": "erlang", - "total_pass": 771, - "total": 771, + "total_pass": 788, + "total": 788, "suites": [ {"name":"tokenize","pass":62,"total":62,"status":"ok"}, {"name":"parse","pass":52,"total":52,"status":"ok"}, @@ -14,6 +14,7 @@ {"name":"fib","pass":8,"total":8,"status":"ok"}, {"name":"ffi","pass":37,"total":37,"status":"ok"}, {"name":"vm","pass":78,"total":78,"status":"ok"}, - {"name":"send_after","pass":10,"total":10,"status":"ok"} + {"name":"send_after","pass":10,"total":10,"status":"ok"}, + {"name":"lists_ext","pass":17,"total":17,"status":"ok"} ] } diff --git a/lib/erlang/scoreboard.md b/lib/erlang/scoreboard.md index a5daa145..01a3b69c 100644 --- a/lib/erlang/scoreboard.md +++ b/lib/erlang/scoreboard.md @@ -1,6 +1,6 @@ # Erlang-on-SX Scoreboard -**Total: 771 / 771 tests passing** +**Total: 788 / 788 tests passing** | | Suite | Pass | Total | |---|---|---|---| @@ -16,6 +16,7 @@ | ✅ | ffi | 37 | 37 | | ✅ | vm | 78 | 78 | | ✅ | send_after | 10 | 10 | +| ✅ | lists_ext | 17 | 17 | Generated by `lib/erlang/conformance.sh`. diff --git a/lib/erlang/tests/lists_ext.sx b/lib/erlang/tests/lists_ext.sx new file mode 100644 index 00000000..f0b5b691 --- /dev/null +++ b/lib/erlang/tests/lists_ext.sx @@ -0,0 +1,76 @@ +;; lists-ext tests — lists:sort/1, lists:sort/2, lists:usort/1. +;; Each case evaluates an Erlang expression that reduces to the bool +;; atom `true` (via =:= on the sorted result) and checks its name. + +(define er-lx-test-count 0) +(define er-lx-test-pass 0) +(define er-lx-test-fails (list)) + +(define + er-lx-test + (fn + (name actual expected) + (set! er-lx-test-count (+ er-lx-test-count 1)) + (if + (= actual expected) + (set! er-lx-test-pass (+ er-lx-test-pass 1)) + (append! er-lx-test-fails {:name name :expected expected :actual actual})))) + +;; eval an Erlang source string and return the result atom's name +(define er-lx-nm (fn (src) (get (erlang-eval-ast src) :name))) + +;; ── lists:sort/1 ────────────────────────────────────────────────── +(er-lx-test "sort/1 ascending" + (er-lx-nm "lists:sort([3,1,2]) =:= [1,2,3]") "true") + +(er-lx-test "sort/1 already sorted" + (er-lx-nm "lists:sort([1,2,3]) =:= [1,2,3]") "true") + +(er-lx-test "sort/1 empty" + (er-lx-nm "lists:sort([]) =:= []") "true") + +(er-lx-test "sort/1 singleton" + (er-lx-nm "lists:sort([7]) =:= [7]") "true") + +(er-lx-test "sort/1 keeps duplicates" + (er-lx-nm "lists:sort([3,1,2,1]) =:= [1,1,2,3]") "true") + +(er-lx-test "sort/1 length preserved" + (erlang-eval-ast "length(lists:sort([5,4,3,2,1]))") 5) + +(er-lx-test "sort/1 term order: number < atom" + (er-lx-nm "lists:sort([b,a,1]) =:= [1,a,b]") "true") + +(er-lx-test "sort/1 tuples elementwise" + (er-lx-nm "lists:sort([{2,a},{1,b},{1,a}]) =:= [{1,a},{1,b},{2,a}]") "true") + +;; ── lists:sort/2 ────────────────────────────────────────────────── +(er-lx-test "sort/2 ascending =<" + (er-lx-nm "lists:sort(fun(A,B) -> A =< B end, [3,1,2]) =:= [1,2,3]") "true") + +(er-lx-test "sort/2 descending >=" + (er-lx-nm "lists:sort(fun(A,B) -> A >= B end, [1,3,2]) =:= [3,2,1]") "true") + +(er-lx-test "sort/2 stable on equal keys" + (er-lx-nm + "lists:sort(fun({A,_},{B,_}) -> A =< B end, [{1,x},{1,y},{0,z}]) =:= [{0,z},{1,x},{1,y}]") + "true") + +(er-lx-test "sort/2 empty" + (er-lx-nm "lists:sort(fun(A,B) -> A =< B end, []) =:= []") "true") + +;; ── lists:usort/1 ───────────────────────────────────────────────── +(er-lx-test "usort/1 removes duplicates" + (er-lx-nm "lists:usort([3,1,2,1,3]) =:= [1,2,3]") "true") + +(er-lx-test "usort/1 empty" + (er-lx-nm "lists:usort([]) =:= []") "true") + +(er-lx-test "usort/1 all equal collapses to one" + (er-lx-nm "lists:usort([5,5,5]) =:= [5]") "true") + +(er-lx-test "usort/1 already unique" + (er-lx-nm "lists:usort([1,2,3]) =:= [1,2,3]") "true") + +(er-lx-test "usort/1 length after dedup" + (erlang-eval-ast "length(lists:usort([4,4,2,2,1,1,4]))") 3) diff --git a/plans/erlang-on-sx.md b/plans/erlang-on-sx.md index 5fa4859d..cec03f19 100644 --- a/plans/erlang-on-sx.md +++ b/plans/erlang-on-sx.md @@ -159,6 +159,8 @@ The Phase 9 opcodes are registered, tested, and bridged SX↔OCaml, but inert: n _Newest first._ +- **2026-06-30 stdlib hardening — `lists:sort/1,2` + `lists:usort/1`** — Roadmap is saturated within this loop's scope (every remaining `[ ]` is blocked: `httpc`/`sqlite` on absent host primitives, 10a/10c on out-of-scope `lib/compiler.sx`). Continued as forever-loop hardening by filling idiomatic-Erlang stdlib gaps. Added the `lists` sort family in a **new file `lib/erlang/lists-ext.sx`** (loaded after `runtime.sx`): stable merge sort over an SX-list bridge, registered via `er-register-pure-bif!`. `lists:sort/1` and `usort/1` use full Erlang term order; `sort/2` takes a `fun(A,B)->bool` comparator. **Two notable findings:** (1) the shared `er-lt?` (transpile.sx) only deep-compares numbers/atoms/strings and treats *any two tuples (or lists) as order-equal* — so `lists:sort` (and, latently, `min/2`/`max/2`) would not order compound terms. Fixed locally with a self-contained `er-ext-lt?` that compares tuples by arity-then-elementwise and lists elementwise (shorter proper prefix first), delegating cross-type cases to `er-lt?`. `er-lt?` itself left untouched (shared by the `<` operator; can't edit transpile.sx — see Blockers). (2) `tests/runtime.sx` resets the BIF registry mid-run via `er-register-builtin-bifs!`, which would wipe a one-shot registration; so `lists-ext.sx` **wraps** `er-register-builtin-bifs!` to re-add its BIFs on every rebuild. New `lists_ext` suite (17 tests: term order, dup-keeping, stability, descending comparator, usort dedup). Conformance **771 → 788/788** (12→13 suites). New-file workaround forced because every sx-tree write tool (incl. `sx_write_file`) raises yojson "Expected string, got null" in this worktree — authored via the `Write` fallback + `sx_validate`, the same pattern other loops use. loops/erlang only. + - **2026-05-18 Phase 8 host-primitive BIFs wired (crypto / cid / file:list_dir)** — `loops/fed-prims` (merged at architecture `380bc69f`) delivered the platform primitives; wired the 3 previously-BLOCKED Phase 8 BIF groups in `lib/erlang/runtime.sx` as `er-register-pure-bif!`/`er-register-bif!` entries with term marshalling at the boundary. **`crypto:hash/2`** → `crypto-sha256`/`crypto-sha512`/`crypto-sha3-256`; atom `Type` dispatch, `er-source-to-string` for `Data`, host hex result → raw bytes via new `er-hexval`/`er-hex->bytes`, returns Erlang binary; bad type/arg → `error:badarg`. **`cid:from_bytes/1`** → `cid-from-bytes` with raw codec `0x55` + sha2-256 multihash assembled in SX (`[0x12,0x20]++digest`); **`cid:to_string/1`** → `cid-from-sx` of `er-format-value` (cbor-encode rejects `er-to-sx`-marshalled symbols; the canonical string form is total + deterministic). **`file:list_dir/1`** → `file-list-dir`, `{ok,[Binary]}` via `er-of-sx` / `{error,Reason}` reusing `er-classify-file-error`. Test gotcha caught + fixed: this Erlang port's binary parser only supports integer/var segments — `<<"abc">>` string-binary literals silently produce **empty** binaries, so the first-cut distinct-input tests compared two empty inputs and failed; rewrote ffi inputs to integer-segment binaries (`<<97,98,99>>`). ffi suite 14→**28** (3 BLOCKED negative-asserts flipped to positive+negative functional tests; `httpc`/`sqlite` kept as deferred unregistered-asserts per fed-prims handoff). Built `sx_server.exe` (dune, opam 5.2.0) at `380bc69f`; full conformance **729/729** (eval 385/385, vm 78/78, **ffi 28/28**, all process suites green). loops/erlang only — not merged, not pushed to architecture. - **2026-05-18 FIXED merge-blocking regression: cyclic-env hang in `er-env-derived-from?`** — A trial merge of loops/erlang → architecture regressed Erlang **715/715 → 0/0** on the architecture binary. Bisected: not loader semantics, not a uniform slowdown — pinpointed to the *single* Phase 7 capstone test (eval.sx lines 1314-1346; prefix-1313 was byte-identical speed on both binaries, 27s, prefix-1346 was 28s on loops vs >5min/hung on architecture). Isolated further: spawn+reload alone 0.6s, reload+purge alone 0.3s, but spawn+reload+**purge over forever-blocked procs** hung. Root cause: `er-env-derived-from?` (transpile.sx, used by `code:purge`/`soft_purge` via `er-procs-on-env`) compared closure envs with `(= env target-env)`. loops/erlang's evaluator implements dict `=` as **object identity**; architecture's 131-commit-newer evaluator changed it to **structural deep equality**. Erlang closure envs are large and **cyclic** (a module fun's `:env` transitively references the fun), so structural `=` over them never terminates. Fix: use `identical?` (pointer-identity predicate, present + consistent `(true false)` on *both* binaries) — the actually-intended semantics and host-independent. Verified: full eval.sx on the architecture binary >200s/hung → **59s**; full 10-suite conformance on the architecture binary now **715/715** (eval 385/385, vm 78/78, ffi 14/14, all process suites green). loops/erlang behaviour unchanged (`identical?` ≡ its old `=`-identity). One-file change (`lib/erlang/transpile.sx`, +7/-2). The merge can now be re-attempted; this was the sole blocker. @@ -251,6 +253,8 @@ _Newest first._ ## Blockers +- **sx-tree WRITE tools broken in this worktree** (2026-06-30). Every sx-tree edit/write tool (`sx_replace_node`, `sx_insert_child`, `sx_insert_near`, …) **and even `sx_write_file`** raise `Yojson.Util.Type_error("Expected string, got null")` against the `mcp_tree.exe` bound in `.mcp.json` (the `/root/rose-ash/...` main-worktree binary). Read/comprehension tools (`sx_validate`, `sx_find_all`, `sx_eval`, `sx_read_*`) work fine. **Workaround:** author/edit `.sx` files with the plain `Write` tool, then `sx_validate` — the same fallback other loops document (see `project_host_on_sx.md`, `project_content_on_sx.md` memory). This is why new `lib/erlang` BIFs land as fresh files (e.g. `lists-ext.sx`) rather than in-place edits to the large `transpile.sx`/`runtime.sx`. Real fix: rebuild `mcp_tree.exe` from current `hosts/ocaml` (out of this loop's binary-build scope) or repoint `.mcp.json` at a fixed binary. + - **Phase 10a — opcode emission requires `lib/compiler.sx` (out of scope)** (2026-05-15). Architecture fully traced this iteration: the OCaml JIT (`sx_vm.ml` `jit_compile_lambda`, ref-set at line 1206) invokes the SX-level `compile` from **`lib/compiler.sx`** via the CEK machine; that is the sole SX→bytecode producer. Erlang's hot helpers (`er-match-tuple`, `er-bif-*`, …) are SX functions in `transpile.sx` that get JIT-compiled through this path. To emit `erlang.OP_*` they must be recognized as intrinsics inside `compiler.sx`'s `compile-call` (the file's own docstring already anticipates this: "Compilers call `extension-opcode-id` to emit extension opcodes" — designed, not yet implemented). `lib/compiler.sx` is **lib-root**, excluded by the ground rules ("Don't edit lib/ root") and absent from the widened `lib/erlang/** + hosts/ocaml/** (extension only)` scope — editing it changes every guest language's JIT, so it must be owned by a shared-compiler session, not this loop. **Fix path:** that session implements 10a.1 (intrinsic registry in `compiler.sx`) + 10a.2 (`compile-call` emits the opcode when registered & `extension-opcode-id` non-nil, else generic CALL). Erlang's BIF handlers (10b, ids 230-239, all real) light up the instant emission exists — zero further work here. The control opcodes (222-229) additionally need 10a.3 (operand contract) + OCaml↔SX runtime-state bridging (Erlang scheduler/mailbox live in `lib/erlang/runtime.sx`, not OCaml). - **Phase 9g — Perf bench gated on 9a** (2026-05-14). The conformance half of 9g (709/709 with stub VM loaded) is satisfied; the perf-bench half requires 9a's bytecode compiler to actually emit the new opcodes at hot call sites. Until then a benchmark would measure today's `er-bif-*` / `er-match-*` numbers unchanged (since the stub handlers wrap them 1-to-1). Re-fire 9g after 9a lands.