From 4852cca9eb7b2fdea63597e6745f5e5b54ee5d9e Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 5 Jun 2026 06:49:40 +0000 Subject: [PATCH] =?UTF-8?q?fed-sx-m1:=20Step=203b=20substrate=20fix=20#3?= =?UTF-8?q?=20=E2=80=94=20atom=5Fto=5Flist/integer=5Fto=5Flist=20as=20Erla?= =?UTF-8?q?ng=20charlists;=20list=5Fto=5F*=20accept=20both=20(+9=20net=20e?= =?UTF-8?q?val,=20759/759)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/erlang/scoreboard.json | 6 ++--- lib/erlang/scoreboard.md | 4 ++-- lib/erlang/tests/eval.sx | 32 +++++++++++++++++++++---- lib/erlang/transpile.sx | 48 ++++++++++++++++++++++++------------- next/README.md | 20 ++++++++-------- plans/fed-sx-milestone-1.md | 3 ++- 6 files changed, 76 insertions(+), 37 deletions(-) diff --git a/lib/erlang/scoreboard.json b/lib/erlang/scoreboard.json index 8b2827f2..97d6f589 100644 --- a/lib/erlang/scoreboard.json +++ b/lib/erlang/scoreboard.json @@ -1,11 +1,11 @@ { "language": "erlang", - "total_pass": 750, - "total": 750, + "total_pass": 759, + "total": 759, "suites": [ {"name":"tokenize","pass":62,"total":62,"status":"ok"}, {"name":"parse","pass":52,"total":52,"status":"ok"}, - {"name":"eval","pass":397,"total":397,"status":"ok"}, + {"name":"eval","pass":406,"total":406,"status":"ok"}, {"name":"runtime","pass":93,"total":93,"status":"ok"}, {"name":"ring","pass":4,"total":4,"status":"ok"}, {"name":"ping-pong","pass":4,"total":4,"status":"ok"}, diff --git a/lib/erlang/scoreboard.md b/lib/erlang/scoreboard.md index 13ad1a7c..6487bf21 100644 --- a/lib/erlang/scoreboard.md +++ b/lib/erlang/scoreboard.md @@ -1,12 +1,12 @@ # Erlang-on-SX Scoreboard -**Total: 750 / 750 tests passing** +**Total: 759 / 759 tests passing** | | Suite | Pass | Total | |---|---|---|---| | ✅ | tokenize | 62 | 62 | | ✅ | parse | 52 | 52 | -| ✅ | eval | 397 | 397 | +| ✅ | eval | 406 | 406 | | ✅ | runtime | 93 | 93 | | ✅ | ring | 4 | 4 | | ✅ | ping-pong | 4 | 4 | diff --git a/lib/erlang/tests/eval.sx b/lib/erlang/tests/eval.sx index 7ff48aed..dca0765d 100644 --- a/lib/erlang/tests/eval.sx +++ b/lib/erlang/tests/eval.sx @@ -228,9 +228,10 @@ (er-eval-test "tuple_size 0" (ev "tuple_size({})") 0) ;; ── BIFs: atom / list conversions ─────────────────────────────── -(er-eval-test "atom_to_list" (ev "atom_to_list(hello)") "hello") +(er-eval-test "atom_to_list -> charlist length" (ev "length(atom_to_list(hello))") 5) +(er-eval-test "atom_to_list -> head $h" (ev "hd(atom_to_list(hello))") 104) (er-eval-test "list_to_atom roundtrip" - (nm (ev "list_to_atom(atom_to_list(foo))")) "foo") + (nm (ev "list_to_atom(atom_to_list(foo))")) "foo") ;; round-trip via charlist (er-eval-test "list_to_atom fresh" (nm (ev "list_to_atom(\"bar\")")) "bar") @@ -1060,11 +1061,13 @@ (er-eval-test "list_to_tuple roundtrip" (ev "tuple_size(list_to_tuple([10, 20, 30]))") 3) -(er-eval-test "integer_to_list" (ev "integer_to_list(42)") "42") -(er-eval-test "integer_to_list neg" (ev "integer_to_list(-99)") "-99") +(er-eval-test "integer_to_list -> charlist length" (ev "length(integer_to_list(42))") 2) +(er-eval-test "integer_to_list 42 head $4" (ev "hd(integer_to_list(42))") 52) +(er-eval-test "integer_to_list neg -> charlist length" (ev "length(integer_to_list(-99))") 3) +(er-eval-test "integer_to_list -99 head $-" (ev "hd(integer_to_list(-99))") 45) (er-eval-test "list_to_integer" (ev "list_to_integer(\"123\")") 123) (er-eval-test "list_to_integer roundtrip" - (ev "list_to_integer(integer_to_list(7))") 7) + (ev "list_to_integer(integer_to_list(7))") 7) ;; round-trip via charlist (er-eval-test "is_function fun" (nm (ev "F = fun (X) -> X end, is_function(F)")) "true") @@ -1358,6 +1361,25 @@ (er-eval-test "list_to_binary char-list round-trip" (nm (ev "list_to_binary([$h, $i]) =:= <<104, 105>>")) "true") + +;; ── atom_to_list / integer_to_list charlist semantics (Step 3b substrate fix #3) ── +(er-eval-test "atom_to_list hd is char code" + (ev "hd(atom_to_list(hi))") 104) +(er-eval-test "atom_to_list maps to bytes via list_to_binary" + (ev "byte_size(list_to_binary(atom_to_list(hello)))") 5) +(er-eval-test "atom_to_list -> list_to_binary -> bytes content" + (nm (ev "list_to_binary(atom_to_list(ok)) =:= <<111, 107>>")) "true") +(er-eval-test "integer_to_list 12345 -> 5 chars" + (ev "length(integer_to_list(12345))") 5) +(er-eval-test "integer_to_list -> bytes -> back" + (ev "list_to_integer(integer_to_list(99999))") 99999) +(er-eval-test "list_to_atom from charlist" + (nm (ev "list_to_atom([$f, $o, $o])")) "foo") +(er-eval-test "list_to_atom from SX-string back-compat" + (nm (ev "list_to_atom(\"bar\")")) "bar") +(er-eval-test "list_to_integer from charlist" + (ev "list_to_integer([$1, $0, $0])") 100) + (define er-eval-test-summary (str "eval " er-eval-test-pass "/" er-eval-test-count)) diff --git a/lib/erlang/transpile.sx b/lib/erlang/transpile.sx index 915d31b6..c72d9298 100644 --- a/lib/erlang/transpile.sx +++ b/lib/erlang/transpile.sx @@ -821,16 +821,30 @@ (len (get v :elements)) (error "Erlang: tuple_size: not a tuple"))))) +(define er-string->charlist + (fn (s) + (let ((cs (string->list s)) (out (er-mk-nil))) + (for-each + (fn (i) + (set! out (er-mk-cons + (char->integer (nth cs (- (- (len cs) 1) i))) + out))) + (range 0 (len cs))) + out))) + (define er-bif-atom-to-list (fn (vs) (let ((v (er-bif-arg1 vs "atom_to_list"))) + ;; Standard Erlang: atom_to_list/1 returns an Erlang charlist + ;; (list of integer char codes). Was: SX string of :name — + ;; unusable from Erlang-land for [Char|T] / ++ / binary segments. (if (er-atom? v) - (get v :name) - (error "Erlang: atom_to_list: not an atom"))))) + (er-string->charlist (get v :name)) + (raise (er-mk-error-marker (er-mk-atom "badarg"))))))) (define er-bif-list-to-atom @@ -838,10 +852,11 @@ (vs) (let ((v (er-bif-arg1 vs "list_to_atom"))) - (if - (= (type-of v) "string") - (er-mk-atom v) - (error "Erlang: list_to_atom: not a string"))))) + ;; Accept Erlang charlist (cons of ints) or SX string. + (let ((s (er-source-to-string v))) + (cond + (= s nil) (raise (er-mk-error-marker (er-mk-atom "badarg"))) + :else (er-mk-atom s)))))) ;; ── lists module ───────────────────────────────────────────────── (define @@ -1597,10 +1612,12 @@ (vs) (let ((v (er-bif-arg1 vs "integer_to_list"))) + ;; Standard Erlang: integer_to_list/1 returns an Erlang charlist + ;; (e.g. integer_to_list(42) -> [$4, $2] -> [52, 50]). (cond (not (= (type-of v) "number")) (raise (er-mk-error-marker (er-mk-atom "badarg"))) - :else (str v))))) + :else (er-string->charlist (str v)))))) (define er-bif-list-to-integer @@ -1608,15 +1625,14 @@ (vs) (let ((v (er-bif-arg1 vs "list_to_integer"))) - (cond - (not (= (type-of v) "string")) - (raise (er-mk-error-marker (er-mk-atom "badarg"))) - :else (let - ((n (parse-number v))) - (cond - (= n nil) - (raise (er-mk-error-marker (er-mk-atom "badarg"))) - :else n)))))) + ;; Accept Erlang charlist (cons of ints) or SX string. + (let ((s (er-source-to-string v))) + (cond + (= s nil) (raise (er-mk-error-marker (er-mk-atom "badarg"))) + :else (let ((n (parse-number s))) + (cond + (= n nil) (raise (er-mk-error-marker (er-mk-atom "badarg"))) + :else n))))))) (define er-bif-is-function diff --git a/next/README.md b/next/README.md index 3b564304..c72fd134 100644 --- a/next/README.md +++ b/next/README.md @@ -103,16 +103,16 @@ The kernel calls into these host primitives: `crypto:hash/2`, These three gaps block the remaining unchecked deliverables: -1. **Term codec** (`3b`/`3c`) — **substrate fixes #1 + #2 done 2026-06-04:** - `erlang:binary_to_list/1` and `erlang:list_to_binary/1` are registered - in `lib/erlang/runtime.sx` (`list_to_binary` is iolist-aware); the - tokenizer's `$X` branch now emits the decimal char code, so `[$h, $i | T]` - patterns and `list_to_binary([$f,$e,$d])` work end-to-end. 750/750 - conformance, +9 ffi + +12 eval tests. Step 3b on-disk segment writer - has a complete byte-level term ↔ binary path. Still parked (low priority - for Milestone 1): `atom_to_list`/`integer_to_list` return SX-strings - (an opaque OCaml-string type), not Erlang charlists — only blocks code - that wants charlist arithmetic on atom/integer names. +1. **Term codec** (`3b`/`3c`) — **all three substrate fixes done 2026-06-05:** + `erlang:binary_to_list/1` and `erlang:list_to_binary/1` registered in + `lib/erlang/runtime.sx` (iolist-aware); the tokenizer's `$X` branch + emits the decimal char code; `atom_to_list/1` and `integer_to_list/1` + now return Erlang charlists (standard Erlang semantics) with `list_to_atom`/ + `list_to_integer` accepting both charlists and SX strings for back-compat. + 759/759 conformance. The full term-codec primitive set is in place — + Step 3b on-disk segment writer can encode arbitrary Erlang activity + terms (atoms, ints, binaries, tuples, lists) into byte sequences using + only Erlang-native primitives. 2. **SX-source eval bridge** — There's no BIF that lets Erlang call into the SX evaluator on a parsed source string. Blocks evaluating the `:schema` / diff --git a/plans/fed-sx-milestone-1.md b/plans/fed-sx-milestone-1.md index 089e2c95..e2d057a1 100644 --- a/plans/fed-sx-milestone-1.md +++ b/plans/fed-sx-milestone-1.md @@ -200,7 +200,7 @@ verify_signature(Activity, ActorState) -> - [ ] **3b** — *Parked behind substrate gap (see Blockers below).* Term codec + on-disk persistence: serializer/parser writing each activity as a JSONL-style line; restart-resumes-tip from the segment file. - [ ] **3c** — Segment rotation at size threshold + gen_server-mediated concurrent appends. -**Blockers (Step 3b) — byte-level path resolved 2026-06-04:** `binary_to_list/1` and `list_to_binary/1` are now registered Erlang BIFs in `lib/erlang/runtime.sx` (Step 3b substrate fix, +9 ffi tests, 738/738 conformance). `list_to_binary` is iolist-aware: accepts nested cons of integer bytes (0-255) and/or binaries; `binary_to_list` returns a proper Erlang charlist of integers. Round-trip verified: `list_to_binary(binary_to_list(B)) =:= B`. On-disk segment writer (3b) can now build segment bytes from `[Header, IoListPayload]` and reconstruct on read — option (c) of the original workaround menu is now cheap. `$X` char literals now decode correctly **as of 2026-06-04**: the Erlang tokenizer's `(= ch "$")` branch (`lib/erlang/tokenizer.sx`) now emits the decimal char code as the token value instead of the raw `$X` text (which `parse-number` couldn't decode → nil). Plain chars use `char->integer` of the first char; the standard escape table (`\n=10 \t=9 \r=13 \s=32 \b=8 \e=27 \f=12 \v=11 \d=127 \0=0 \\=92 \"=34 \'=39`) handles `$\X` forms. So `[$h, $i | T]` patterns and `list_to_binary([$f,$e,$d])` both work end-to-end. +12 eval tests, 750/750. Combined with 3b's `binary_to_list`/`list_to_binary`, Erlang code can now read/write byte sequences and string-shaped char lists fluently. Still parked: `atom_to_list/1`/`integer_to_list/1` return SX strings rather than Erlang charlists — only blocks code that wants to do `[$0+N | _]` arithmetic on integer-to-string output or `[Lower | _]` on atom names; downstream Steps in this milestone don't need it. +**Blockers (Step 3b) — byte-level path resolved 2026-06-04:** `binary_to_list/1` and `list_to_binary/1` are now registered Erlang BIFs in `lib/erlang/runtime.sx` (Step 3b substrate fix, +9 ffi tests, 738/738 conformance). `list_to_binary` is iolist-aware: accepts nested cons of integer bytes (0-255) and/or binaries; `binary_to_list` returns a proper Erlang charlist of integers. Round-trip verified: `list_to_binary(binary_to_list(B)) =:= B`. On-disk segment writer (3b) can now build segment bytes from `[Header, IoListPayload]` and reconstruct on read — option (c) of the original workaround menu is now cheap. `$X` char literals now decode correctly **as of 2026-06-04**: the Erlang tokenizer's `(= ch "$")` branch (`lib/erlang/tokenizer.sx`) now emits the decimal char code as the token value instead of the raw `$X` text (which `parse-number` couldn't decode → nil). Plain chars use `char->integer` of the first char; the standard escape table (`\n=10 \t=9 \r=13 \s=32 \b=8 \e=27 \f=12 \v=11 \d=127 \0=0 \\=92 \"=34 \'=39`) handles `$\X` forms. So `[$h, $i | T]` patterns and `list_to_binary([$f,$e,$d])` both work end-to-end. +12 eval tests, 750/750. Combined with 3b's `binary_to_list`/`list_to_binary`, Erlang code can now read/write byte sequences and string-shaped char lists fluently. **All three substrate gaps resolved as of 2026-06-05.** `atom_to_list/1` and `integer_to_list/1` now return Erlang charlists (cons of int char codes — standard Erlang semantics) via a new `er-string->charlist` helper in `transpile.sx`. `list_to_atom/1` and `list_to_integer/1` accept either charlists OR SX strings (back-compat via the existing `er-source-to-string` coercer). Composition works end-to-end: `list_to_binary(atom_to_list(hello)) =:= <<104,101,108,108,111>>` and `integer_to_list(N)` round-trips through `list_to_integer`. 5 existing eval tests rewritten to charlist semantics, 8 new charlist-aware tests added (759/759). The full term-codec primitive set — `binary_to_list`, `list_to_binary`, `$X`, `atom_to_list`, `integer_to_list` charlist semantics, plus existing `file:read_file`/`write_file`/`list_dir` — is now in place. **Deliverables:** @@ -1003,6 +1003,7 @@ A few things still under-specified; resolve as work begins. Newest first. One line per sub-deliverable commit. Erlang conformance gate (`bash lib/erlang/conformance.sh`) must remain 729/729 on every entry. +- **2026-06-05** — Step 3b substrate fix #3 (final): `atom_to_list/1` and `integer_to_list/1` now return Erlang charlists (cons-of-int-char-codes) instead of SX strings — standard Erlang semantics. New helper `er-string->charlist` in `transpile.sx`. `list_to_atom/1` and `list_to_integer/1` accept either charlists OR SX strings (back-compat via the existing `er-source-to-string` coercer, which already handles both shapes). 5 existing eval tests rewritten to match new semantics (e.g. `length(atom_to_list(hello)) =:= 5`, `hd(integer_to_list(42)) =:= 52`). 8 new charlist-coverage tests demonstrating composition: `list_to_binary(atom_to_list(ok)) =:= <<111,107>>`; `list_to_atom([$f,$o,$o])` round-trips; `list_to_integer([$1,$0,$0]) =:= 100`. Erlang conformance **759/759** (eval 397→406, +9 net). The full term-codec primitive set — `binary_to_list`/`list_to_binary` (24e3bf53), `$X` literals (3d80bd8c), and now `atom_to_list`/`integer_to_list` charlists — is in place; Step 3b on-disk segment writer can encode arbitrary Erlang activity terms (atoms, ints, binaries, tuples, lists) into byte sequences using only Erlang-native primitives. - **2026-06-04** — Step 3b substrate fix #2: `$X` char-literal decoding. Patched the Erlang tokenizer's `(= ch "$")` branch in `lib/erlang/tokenizer.sx` to emit the decimal char code as the integer token value instead of the raw `$X` source text (which `parse-number` couldn't decode → nil). Plain `$c` uses `char->integer` of the first char; `$\C` consults the standard Erlang escape table (`\n=10 \t=9 \r=13 \s=32 \b=8 \e=27 \f=12 \v=11 \d=127 \0=0 \\=92 \"=34 \'=39`). End-of-file after `$` decodes to 0 defensively. Probes: `$A→65`, `$0→48`, `$\n→10`, `$\\→92`, `[$h,$i]` → cons of 104/105, `list_to_binary([$f,$e,$d])` → `<<102,101,100>>`. +12 eval tests (single chars, each escape, list/binary composition with previous BIFs). Combined with substrate fix #1, Erlang code in fed-sx-m1 can now write `[$h, $i | T]` patterns AND construct/deconstruct binaries — a full term-codec primitive set. Erlang conformance **750/750** (eval 385→397). Plan Blockers note updated; remaining `atom_to_list`/`integer_to_list` charlist gap noted as low-priority for Milestone 1. - **2026-06-04** — Step 3b substrate fix: registered `erlang:binary_to_list/1` and `erlang:list_to_binary/1` in `lib/erlang/runtime.sx` — the byte-level half of the term-codec gap. `binary_to_list` returns a proper Erlang charlist (`er-mk-cons` chain of byte ints). `list_to_binary` is iolist-aware via a recursive `er-iolist-walk!` that accepts nil / cons / binary / integer 0-255 and flattens nested iolists (e.g. `[1, <<2,3>>, [4, [5]]]` → `<<1,2,3,4,5>>`); out-of-range bytes or non-iolist elements raise `error:badarg`. Round-trip verified: `list_to_binary(binary_to_list(B)) =:= B`. +9 ffi tests (length, hd, empty→[], flat byte_size, nested-iolist, round-trip, 3 badarg paths). On-disk segment writer (3b) now has a complete `[Header | IoListPayload] → Binary` path; the remaining two substrate gaps (`atom_to_list`/`integer_to_list` as Erlang charlists, `$X` char-literal decoding) are still parked but no longer block 3b implementation if the encoding uses byte ints directly. Erlang conformance **738/738** (ffi 28→37). Plan Blockers note for Step 3b updated to reflect the partial resolution. - **2026-05-28** — Step 4f-consolidate: `bootstrap:start/3(ActorId, KeySpec, ActorState)` brings up the full kernel substrate in one call — starts the registry gen_server, populates it from the canonical genesis bundle (31 entries across 7 kinds), then starts nx_kernel. Returns the kernel Pid (gen_server convention in this port returns raw Pid not `{ok, Pid}`). Tests verify whereis(nx_kernel), per-kind counts (3/10/7/3/3/2/3), registry lookup of a known entry (`create`), publish + log_tip advance. `next/tests/bootstrap_start.sh` 10/10. Erlang conformance 729/729.