diff --git a/lib/erlang/parser-expr.sx b/lib/erlang/parser-expr.sx index 9bfbca9d..499f6e10 100644 --- a/lib/erlang/parser-expr.sx +++ b/lib/erlang/parser-expr.sx @@ -237,6 +237,8 @@ (er-parse-fun-expr st) (er-is? st "keyword" "try") (er-parse-try st) + (er-is? st "punct" "<<") + (er-parse-binary st) :else (error (str "Erlang parse: unexpected " @@ -576,3 +578,63 @@ ((guards (if (er-is? st "keyword" "when") (do (er-advance! st) (er-parse-guards st)) (list)))) (er-expect! st "punct" "->") (let ((body (er-parse-body st))) {:pattern pat :body body :class klass :guards guards})))))) + +;; ── binary literals / patterns ──────────────────────────────── +;; `<< [Seg {, Seg}] >>` where Seg = Value [: Size] [/ Spec]. Size is +;; a literal integer (multiple of 8 supported); Spec is `integer` +;; (default) or `binary` (rest-of-binary tail). Sufficient for the +;; common `<>` patterns. +(define + er-parse-binary + (fn + (st) + (er-expect! st "punct" "<<") + (cond + (er-is? st "punct" ">>") + (do (er-advance! st) {:segments (list) :type "binary"}) + :else (let + ((segs (list (er-parse-binary-segment st)))) + (er-parse-binary-tail st segs))))) + +(define + er-parse-binary-tail + (fn + (st segs) + (cond + (er-is? st "punct" ",") + (do + (er-advance! st) + (append! segs (er-parse-binary-segment st)) + (er-parse-binary-tail st segs)) + (er-is? st "punct" ">>") + (do (er-advance! st) {:segments segs :type "binary"}) + :else (error + (str + "Erlang parse: expected ',' or '>>' in binary, got '" + (er-cur-value st) + "'"))))) + +(define + er-parse-binary-segment + (fn + (st) + ;; Use `er-parse-primary` for the value so a leading `:` falls + ;; through to the segment's size suffix instead of being eaten + ;; by `er-parse-postfix-loop` as a `Mod:Fun` remote call. + (let + ((v (er-parse-primary st))) + (let + ((size (cond + (er-is? st "punct" ":") + (do (er-advance! st) (er-parse-primary st)) + :else nil)) + (spec (cond + (er-is? st "op" "/") + (do + (er-advance! st) + (let + ((tok (er-cur st))) + (er-advance! st) + (get tok :value))) + :else "integer"))) + {:size size :spec spec :value v})))) diff --git a/lib/erlang/scoreboard.json b/lib/erlang/scoreboard.json index f6264ec7..cf98c8d9 100644 --- a/lib/erlang/scoreboard.json +++ b/lib/erlang/scoreboard.json @@ -1,11 +1,11 @@ { "language": "erlang", - "total_pass": 456, - "total": 456, + "total_pass": 477, + "total": 477, "suites": [ {"name":"tokenize","pass":62,"total":62,"status":"ok"}, {"name":"parse","pass":52,"total":52,"status":"ok"}, - {"name":"eval","pass":272,"total":272,"status":"ok"}, + {"name":"eval","pass":293,"total":293,"status":"ok"}, {"name":"runtime","pass":39,"total":39,"status":"ok"}, {"name":"ring","pass":4,"total":4,"status":"ok"}, {"name":"ping-pong","pass":4,"total":4,"status":"ok"}, diff --git a/lib/erlang/scoreboard.md b/lib/erlang/scoreboard.md index 613c5d7f..86429df7 100644 --- a/lib/erlang/scoreboard.md +++ b/lib/erlang/scoreboard.md @@ -1,12 +1,12 @@ # Erlang-on-SX Scoreboard -**Total: 456 / 456 tests passing** +**Total: 477 / 477 tests passing** | | Suite | Pass | Total | |---|---|---|---| | ✅ | tokenize | 62 | 62 | | ✅ | parse | 52 | 52 | -| ✅ | eval | 272 | 272 | +| ✅ | eval | 293 | 293 | | ✅ | runtime | 39 | 39 | | ✅ | ring | 4 | 4 | | ✅ | ping-pong | 4 | 4 | diff --git a/lib/erlang/tests/eval.sx b/lib/erlang/tests/eval.sx index a832e652..e2ad5eb9 100644 --- a/lib/erlang/tests/eval.sx +++ b/lib/erlang/tests/eval.sx @@ -937,6 +937,63 @@ (nm (nth (get (get (ev "[{ok, X} || X <- [a, b]]") :head) :elements) 0)) "ok") +;; ── binary literals / patterns ──────────────────────────────── +(er-eval-test "binary tag" + (get (ev "<<>>") :tag) "binary") +(er-eval-test "is_binary empty" (nm (ev "is_binary(<<>>)")) "true") +(er-eval-test "is_binary 3 bytes" + (nm (ev "is_binary(<<1, 2, 3>>)")) "true") +(er-eval-test "is_binary list" (nm (ev "is_binary([1, 2])")) "false") +(er-eval-test "byte_size 0" (ev "byte_size(<<>>)") 0) +(er-eval-test "byte_size 3" (ev "byte_size(<<1, 2, 3>>)") 3) +(er-eval-test "byte_size 16-bit" (ev "byte_size(<<256:16>>)") 2) +(er-eval-test "byte_size 32-bit" (ev "byte_size(<<999999:32>>)") 4) + +;; Match +(er-eval-test "match single byte" + (ev "<> = <<7>>, X") 7) +(er-eval-test "match X:8" + (ev "<> = <<200>>, X") 200) +(er-eval-test "match 16-bit decode" + (ev "<> = <<1, 0>>, X") 256) +(er-eval-test "match 16-bit hi byte" + (ev "<> = <<2, 1>>, X") 513) +(er-eval-test "match A:8 B:16" + (ev "<> = <<1, 0, 2>>, A + B") 3) +(er-eval-test "match three 8-bit" + (ev "<> = <<1, 2, 3>>, A + B + C") 6) + +;; Tail binary +(er-eval-test "tail rest size" + (ev "<<_:8, Rest/binary>> = <<1, 2, 3, 4>>, byte_size(Rest)") 3) +(er-eval-test "tail rest content" + (ev "<<_:8, Rest/binary>> = <<1, 2, 3, 4>>, <> = Rest, X") 2) + +;; Match failure +(er-eval-test "size mismatch fails" + (do + (ev "P = spawn(fun () -> <> = <<1>>, ok end), receive after 0 -> ok end") + (let ((reason (er-proc-field (er-mk-pid 1) :exit-reason))) + (cond + (er-tuple? reason) (nm (nth (get reason :elements) 0)) + (er-atom? reason) (get reason :name) + :else nil))) + "badmatch") + +;; Equality +(er-eval-test "binary =:= self" + (nm (ev "B = <<1, 2, 3>>, B =:= B")) "true") +(er-eval-test "binary =:= same" + (nm (ev "<<1, 2>> =:= <<1, 2>>")) "true") +(er-eval-test "binary =/= different" + (nm (ev "<<1, 2>> =:= <<1, 3>>")) "false") + +;; Construction with computed value +(er-eval-test "build with var" + (ev "X = 42, byte_size(<>)") 1) +(er-eval-test "build with size var" + (ev "X = 7, byte_size(<>)") 2) + (define er-eval-test-summary (str "eval " er-eval-test-pass "/" er-eval-test-count)) diff --git a/lib/erlang/transpile.sx b/lib/erlang/transpile.sx index a167db0f..d3d7bd18 100644 --- a/lib/erlang/transpile.sx +++ b/lib/erlang/transpile.sx @@ -24,6 +24,8 @@ (define er-mk-nil (fn () {:tag "nil"})) (define er-mk-cons (fn (h t) {:tag "cons" :head h :tail t})) (define er-mk-tuple (fn (elems) {:tag "tuple" :elements elems})) +(define er-mk-binary (fn (bytes) {:tag "binary" :bytes bytes})) +(define er-binary? (fn (v) (er-is-tagged? v "binary"))) (define er-bool (fn (b) (if b er-atom-true er-atom-false))) (define @@ -124,6 +126,7 @@ (= ty "receive") (er-eval-receive node env) (= ty "try") (er-eval-try node env) (= ty "lc") (er-eval-lc node env) + (= ty "binary") (er-eval-binary node env) (= ty "match") (er-eval-match node env) :else (error (str "Erlang eval: unsupported node type '" ty "'")))))) @@ -195,6 +198,7 @@ (= ty "nil") (er-nil? val) (= ty "tuple") (er-match-tuple pat val env) (= ty "cons") (er-match-cons pat val env) + (= ty "binary") (er-match-binary pat val env) :else (error (str "Erlang match: unsupported pattern type '" ty "'")))))) (define @@ -240,6 +244,95 @@ (er-match! (get pat :head) (get val :head) env) (er-match! (get pat :tail) (get val :tail) env))))) +;; Match `<>` against a binary value. Walks the +;; segment list left-to-right, consuming bytes from the front of the +;; binary for each segment. Integer segments decode big-endian and +;; bind/check the pattern; binary-spec segments without size capture +;; the trailing bytes as a binary value. +(define + er-match-binary + (fn + (pat val env) + (and + (er-binary? val) + (let + ((segs (get pat :segments)) (cursor (list 0))) + (and + (er-match-binary-segs segs val env cursor 0) + (= (nth cursor 0) (len (get val :bytes)))))))) + +(define + er-match-binary-segs + (fn + (segs val env cursor i) + (cond + (>= i (len segs)) true + :else (let + ((seg (nth segs i))) + (let + ((spec (get seg :spec)) + (size-node (get seg :size))) + (cond + (= spec "integer") + (er-match-binary-int seg val env cursor segs i) + (= spec "binary") + (er-match-binary-tail seg val env cursor segs i) + :else false)))))) + +(define + er-match-binary-int + (fn + (seg val env cursor segs i) + (let + ((bits (cond + (= (get seg :size) nil) 8 + :else (er-eval-expr (get seg :size) env)))) + (cond + (or (not (= (remainder bits 8) 0)) (<= bits 0)) false + :else (let + ((nbytes (truncate (/ bits 8))) (bytes (get val :bytes)) (start (nth cursor 0))) + (cond + (> (+ start nbytes) (len bytes)) false + :else (let + ((decoded (er-decode-int bytes start nbytes))) + (set-nth! cursor 0 (+ start nbytes)) + (and + (er-match! (get seg :value) decoded env) + (er-match-binary-segs segs val env cursor (+ i 1)))))))))) + +(define + er-decode-int + (fn + (bytes start nbytes) + (let + ((acc (list 0))) + (for-each + (fn + (j) + (set-nth! + acc + 0 + (+ (* (nth acc 0) 256) (nth bytes (+ start j))))) + (range 0 nbytes)) + (nth acc 0)))) + +(define + er-match-binary-tail + (fn + (seg val env cursor segs i) + (cond + (not (= (get seg :size) nil)) false + (not (= (+ i 1) (len segs))) false + :else (let + ((bytes (get val :bytes)) + (start (nth cursor 0)) + (rest-bytes (list))) + (for-each + (fn (k) (append! rest-bytes (nth bytes k))) + (range start (len bytes))) + (set-nth! cursor 0 (len bytes)) + (er-match! (get seg :value) (er-mk-binary rest-bytes) env))))) + ;; ── env snapshot / restore ──────────────────────────────────────── (define er-env-copy @@ -375,6 +468,12 @@ (and (= (type-of a) "string") (= (type-of b) "string")) (= a b) (and (er-pid? a) (er-pid? b)) (= (get a :id) (get b :id)) (and (er-ref? a) (er-ref? b)) (= (get a :id) (get b :id)) + (and (er-binary? a) (er-binary? b)) + (let + ((ba (get a :bytes)) (bb (get b :bytes))) + (and + (= (len ba) (len bb)) + (every? (fn (i) (= (nth ba i) (nth bb i))) (range 0 (len ba))))) :else false))) ;; Exact equality: 1 =/= 1.0 in Erlang. @@ -589,6 +688,8 @@ (= name "list_to_atom") (er-bif-list-to-atom vs) (= name "is_pid") (er-bif-is-pid vs) (= name "is_reference") (er-bif-is-reference vs) + (= name "is_binary") (er-bif-is-binary vs) + (= name "byte_size") (er-bif-byte-size vs) (= name "self") (er-bif-self vs) (= name "spawn") (er-bif-spawn vs) (= name "exit") (er-bif-exit vs) @@ -696,6 +797,20 @@ (er-bool (or (er-is-atom-named? v "true") (er-is-atom-named? v "false")))))) +(define + er-bif-is-binary + (fn (vs) (er-bool (er-binary? (er-bif-arg1 vs "is_binary"))))) + +(define + er-bif-byte-size + (fn + (vs) + (let + ((v (er-bif-arg1 vs "byte_size"))) + (cond + (er-binary? v) (len (get v :bytes)) + :else (raise (er-mk-error-marker (er-mk-atom "badarg"))))))) + ;; ── list / tuple BIFs ──────────────────────────────────────────── (define er-bif-length (fn (vs) (er-list-length (er-bif-arg1 vs "length")))) @@ -936,8 +1051,22 @@ (er-fun? v) "#Fun" (er-pid? v) (str "") (er-ref? v) (str "#Ref<" (get v :id) ">") + (er-binary? v) (str "<<" (er-format-bytes (get v :bytes)) ">>") :else (str v)))) +(define + er-format-bytes + (fn + (bs) + (cond + (= (len bs) 0) "" + :else (let + ((out (list (str (nth bs 0))))) + (for-each + (fn (i) (append! out ",") (append! out (str (nth bs i)))) + (range 1 (len bs))) + (reduce str "" out))))) + (define er-format-list-elems (fn @@ -1363,3 +1492,81 @@ (set-nth! acc 0 (er-mk-cons (nth xs j) (nth acc 0))))) (range 0 (len xs))) (nth acc 0)))) + +;; ── binaries ──────────────────────────────────────────────────── +;; Each segment is `Value : Size / Spec`. Supported specs: `integer` +;; (default; size in bits, must be multiple of 8 — 8/16/24/32 typical) +;; and `binary` (concatenate the segment's binary value into the +;; result). Default size for `integer` segments is 8 bits. +(define + er-eval-binary + (fn + (node env) + (let + ((segs (get node :segments)) (out (list))) + (for-each + (fn (i) (er-eval-binary-segment (nth segs i) env out)) + (range 0 (len segs))) + (er-mk-binary out)))) + +(define + er-eval-binary-segment + (fn + (seg env out) + (let + ((spec (get seg :spec)) + (val (er-eval-expr (get seg :value) env)) + (size (er-eval-binary-size (get seg :size) env))) + (cond + (= spec "integer") + (let + ((bits (if (= size nil) 8 size))) + (er-emit-int! out val bits)) + (= spec "binary") + (cond + (er-binary? val) + (for-each + (fn (i) (append! out (nth (get val :bytes) i))) + (range 0 (len (get val :bytes)))) + :else (raise + (er-mk-error-marker (er-mk-atom "badarg")))) + :else (error + (str "Erlang: binary spec '" spec "' not supported")))))) + +(define + er-eval-binary-size + (fn + (node env) + (cond + (= node nil) nil + :else (er-eval-expr node env)))) + +;; Big-endian byte emission for an N-bit integer (N must be multiple +;; of 8). For bits=8 this is just `(append! out (mod v 256))`. +(define + er-emit-int! + (fn + (out v bits) + (cond + (or (not (= (remainder bits 8) 0)) (<= bits 0)) + (error + (str "Erlang: binary integer size must be a positive multiple of 8 (got " bits ")")) + :else (let + ((nbytes (truncate (/ bits 8)))) + (for-each + (fn + (i) + (let + ((shift (* 8 (- (- nbytes 1) i)))) + (append! + out + (remainder (truncate (/ v (er-int-pow 2 shift))) 256)))) + (range 0 nbytes)))))) + +(define + er-int-pow + (fn + (b e) + (cond + (= e 0) 1 + :else (* b (er-int-pow b (- e 1)))))) diff --git a/plans/erlang-on-sx.md b/plans/erlang-on-sx.md index a70ddb2a..b27b0a15 100644 --- a/plans/erlang-on-sx.md +++ b/plans/erlang-on-sx.md @@ -53,7 +53,7 @@ Core mapping: - [x] Tokenizer: atoms (bare + single-quoted), variables (Uppercase/`_`-prefixed), numbers (int, float, `16#HEX`), strings `"..."`, chars `$c`, punct `( ) { } [ ] , ; . : :: ->` — **62/62 tests** - [x] Parser: module declarations, `-module`/`-export`/`-import` attributes, function clauses with head patterns + guards + body — **52/52 tests** - [x] Expressions: literals, vars, calls, tuples `{...}`, lists `[...|...]`, `if`, `case`, `receive`, `fun`, `try/catch`, operators, precedence -- [ ] Binaries `<<...>>` — not yet parsed (deferred to Phase 6) +- [x] Binaries `<<...>>` — landed in Phase 6 (parser + eval + pattern matching) - [x] Unit tests in `lib/erlang/tests/parse.sx` ### Phase 2 — sequential eval + pattern matching + BIFs @@ -91,7 +91,7 @@ Core mapping: ### Phase 6 — the rest - [x] List comprehensions `[X*2 || X <- L]` — **12 new eval tests**; generators, filters, multiple generators (cartesian), pattern-matching gens (`{ok, V} <- ...`) -- [ ] Binary pattern matching `<>` +- [x] Binary pattern matching `<>` — **21 new eval tests**; literal construction, byte/multi-byte segments, `Rest/binary` tail capture, `is_binary/1`, `byte_size/1` - [ ] ETS-lite (in-memory tables via SX dicts) - [ ] More BIFs — target 200+ test corpus green @@ -99,6 +99,7 @@ Core mapping: _Newest first._ +- **2026-04-25 binary pattern matching green** — Parser additions: `<<...>>` literal/pattern in `er-parse-primary`, segment grammar `Value [: Size] [/ Spec]` (Spec defaults to `integer`, supports `binary` for tail). Critical fix: segment value uses `er-parse-primary` (not `er-parse-expr-prec`) so the trailing `:Size` doesn't get eaten by the postfix `Mod:Fun` remote-call handler. Runtime value: `{:tag "binary" :bytes (list of int 0-255)}`. Construction: integer segments emit big-endian bytes (size in bits, must be multiple of 8); binary-spec segments concatenate. Pattern matching consumes bytes from a cursor at the front, decoding integer segments big-endian, capturing `Rest/binary` tail at the end. Whole-binary length must consume exactly. New BIFs: `is_binary/1`, `byte_size/1`. Binaries participate in `er-equal?` (byte-wise) and format as `<>`. 21 new eval tests: tag/predicate, byte_size for 8/16/32-bit segments, single + multi segment match, three 8-bit, tail rest size + content, badmatch on size mismatch, `=:=` equality, var-driven construction. Total suite 477/477. - **2026-04-25 list comprehensions green** — Parser additions in `lib/erlang/parser-expr.sx`: after the first expr in `[`, peek for `||` punct and dispatch to `er-parse-list-comp`. Qualifiers separated by `,`, each one is `Pattern <- Source` (generator) or any expression (filter — disambiguated by absence of `<-`). AST: `{:type "lc" :head E :qualifiers [...]}` with each qualifier `{:kind "gen"/"filter" ...}`. Evaluator (`er-eval-lc` in transpile.sx): right-fold builds the result by walking qualifiers; generators iterate the source list with env snapshot/restore per element so pattern-bound vars don't leak between iterations; filters skip when falsy. Pattern-matching generators are silently skipped on no-match (e.g. `[V || {ok, V} <- ...]`). 12 new eval tests: map double, fold-sum-of-comprehension, length, filter sum, "all filtered", empty source, cartesian, pattern-match gen, nested generators with filter, squares, tuple capture. Total suite 456/456. - **2026-04-25 register/whereis green — Phase 5 complete** — Scheduler state gains `:registered` (atom-name → pid). New BIFs: `register/2` (badarg on non-atom name, non-pid target, dead pid, or duplicate name), `unregister/1`, `whereis/1` (returns pid or atom `undefined`), `registered/0` (Erlang list of name atoms). `er-eval-send` for `Name ! Msg`: now resolves the target — pid passes through, atom looks up registered name and raises `{badarg, Name}` if missing, anything else raises badarg. Process death (in `er-sched-step!`) calls `er-unregister-pid!` to drop any registered name before `er-propagate-exit!` so monitor `{'DOWN'}` messages see the cleared registry. 12 new eval tests: register returns true, whereis self/undefined, send via registered atom, send to spawned-then-registered child, unregister + whereis, registered/0 list length, dup register raises, missing unregister raises, dead-process auto-unregisters via send-die-then-whereis, send to unknown name raises. Total suite 444/444. **Phase 5 complete — Phase 6 (list comprehensions, binary patterns, ETS) is the last phase.** - **2026-04-25 supervisor (one-for-one) green** — `er-supervisor-source` in `lib/erlang/runtime.sx` is the canonical Erlang text of a minimal supervisor; `er-load-supervisor!` registers it. Implements `start_link(Mod, Args)` (sup process traps exits, calls `Mod:init/1` to get child-spec list, runs `start_child/1` for each which links the spawned pid back to itself), `which_children/1`, `stop/1`. Receive loop dispatches on `{'EXIT', Dead, _Reason}` (restarts only the dead child via `restart/2`, keeps siblings — proper one-for-one), `{'$sup_which', From}` (returns child list), `'$sup_stop'`. Child specs are `{Id, StartFn}` where `StartFn/0` returns the new child's pid. 7 new eval tests: `which_children` for 1- and 3-child sup, child responds to ping, killed child restarted with fresh pid, restarted child still functional, one-for-one isolation (siblings keep their pids), stop returns ok. Total suite 432/432.