From a3811545079f5cda379779c6d900394a00435d92 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 1 May 2026 19:16:02 +0000 Subject: [PATCH] spec: bytevectors (make-bytevector/u8-ref/u8-set!/utf8->string/etc) Co-Authored-By: Claude Sonnet 4.6 --- hosts/javascript/platform.py | 68 +++++++++ hosts/ocaml/lib/sx_primitives.ml | 104 +++++++++++++- hosts/ocaml/lib/sx_types.ml | 7 +- spec/primitives.sx | 74 ++++++++++ spec/tests/test-bytevectors.sx | 236 +++++++++++++++++++++++++++++++ 5 files changed, 486 insertions(+), 3 deletions(-) create mode 100644 spec/tests/test-bytevectors.sx diff --git a/hosts/javascript/platform.py b/hosts/javascript/platform.py index dc8f1e63..3cef3514 100644 --- a/hosts/javascript/platform.py +++ b/hosts/javascript/platform.py @@ -1817,6 +1817,73 @@ PRIMITIVES_JS_MODULES: dict[str, str] = { }); return out; }; +''', + "stdlib.bytevectors": ''' + // stdlib.bytevectors — R7RS bytevector type backed by Uint8Array + function SxBytevector(size_or_buf) { + if (size_or_buf instanceof Uint8Array) { + this.data = size_or_buf; + } else { + this.data = new Uint8Array(typeof size_or_buf === "number" ? size_or_buf : 0); + } + this._bytevector = true; + } + SxBytevector.prototype._type = "bytevector"; + PRIMITIVES["make-bytevector"] = function(n, fill) { + var bv = new SxBytevector(n); + if (fill !== undefined) bv.data.fill(fill & 0xff); + return bv; + }; + PRIMITIVES["bytevector?"] = function(v) { return v instanceof SxBytevector; }; + PRIMITIVES["bytevector-length"] = function(bv) { return bv.data.length; }; + PRIMITIVES["bytevector-u8-ref"] = function(bv, i) { return bv.data[i]; }; + PRIMITIVES["bytevector-u8-set!"] = function(bv, i, byte) { bv.data[i] = byte & 0xff; return NIL; }; + PRIMITIVES["bytevector-copy"] = function(bv, start, end_) { + var s = start === undefined ? 0 : start; + var e = end_ === undefined ? bv.data.length : end_; + return new SxBytevector(bv.data.slice(s, e)); + }; + PRIMITIVES["bytevector-copy!"] = function(dst, at, src, start, end_) { + var s = start === undefined ? 0 : start; + var e = end_ === undefined ? src.data.length : end_; + dst.data.set(src.data.subarray(s, e), at); + return NIL; + }; + PRIMITIVES["bytevector-append"] = function() { + var total = 0; + for (var i = 0; i < arguments.length; i++) total += arguments[i].data.length; + var result = new Uint8Array(total); + var pos = 0; + for (var i = 0; i < arguments.length; i++) { + result.set(arguments[i].data, pos); + pos += arguments[i].data.length; + } + return new SxBytevector(result); + }; + PRIMITIVES["utf8->string"] = function(bv, start, end_) { + var s = start === undefined ? 0 : start; + var e = end_ === undefined ? bv.data.length : end_; + var dec = new TextDecoder("utf-8"); + return dec.decode(bv.data.subarray(s, e)); + }; + PRIMITIVES["string->utf8"] = function(str, start, end_) { + var enc = new TextEncoder(); + var full = enc.encode(str); + var s = start === undefined ? 0 : start; + var e = end_ === undefined ? full.length : end_; + return new SxBytevector(full.slice(s, e)); + }; + PRIMITIVES["bytevector->list"] = function(bv) { + var out = []; + for (var i = 0; i < bv.data.length; i++) out.push(bv.data[i]); + return out; + }; + PRIMITIVES["list->bytevector"] = function(lst) { + if (!Array.isArray(lst)) lst = []; + var b = new Uint8Array(lst.length); + for (var i = 0; i < lst.length; i++) b[i] = lst[i] & 0xff; + return new SxBytevector(b); + }; ''', } # Modules to include by default (all) @@ -1864,6 +1931,7 @@ PLATFORM_JS_PRE = ''' if (x._hash_table) return "hash-table"; if (x._sxset) return "set"; if (x._regexp) return "regexp"; + if (x._bytevector) return "bytevector"; if (x._rational) return "rational"; if (typeof Node !== "undefined" && x instanceof Node) return "dom-node"; if (Array.isArray(x)) return "list"; diff --git a/hosts/ocaml/lib/sx_primitives.ml b/hosts/ocaml/lib/sx_primitives.ml index b4fced92..142e15ec 100644 --- a/hosts/ocaml/lib/sx_primitives.ml +++ b/hosts/ocaml/lib/sx_primitives.ml @@ -2891,4 +2891,106 @@ let () = let r = !Sx_types._cek_call_ref fn (List [v]) in Hashtbl.replace out (set_key r) r) ht; SxSet out - | _ -> raise (Eval_error "set-map: expected set fn")) + | _ -> raise (Eval_error "set-map: expected set fn")); + (* === Bytevectors === *) + register "make-bytevector" (fun args -> + match args with + | [Integer n] -> SxBytevector (Bytes.make n '\000') + | [Integer n; Integer fill] -> + if fill < 0 || fill > 255 then raise (Eval_error "make-bytevector: fill must be 0-255"); + SxBytevector (Bytes.make n (Char.chr fill)) + | _ -> raise (Eval_error "make-bytevector: expected n [fill]")); + register "bytevector?" (fun args -> + match args with + | [SxBytevector _] -> Bool true + | [_] -> Bool false + | _ -> raise (Eval_error "bytevector?: 1 arg")); + register "bytevector-length" (fun args -> + match args with + | [SxBytevector b] -> Integer (Bytes.length b) + | _ -> raise (Eval_error "bytevector-length: expected bytevector")); + register "bytevector-u8-ref" (fun args -> + match args with + | [SxBytevector b; Integer i] -> + if i < 0 || i >= Bytes.length b then + raise (Eval_error (Printf.sprintf "bytevector-u8-ref: index %d out of range" i)); + Integer (Char.code (Bytes.get b i)) + | _ -> raise (Eval_error "bytevector-u8-ref: expected bytevector index")); + register "bytevector-u8-set!" (fun args -> + match args with + | [SxBytevector b; Integer i; Integer byte] -> + if i < 0 || i >= Bytes.length b then + raise (Eval_error (Printf.sprintf "bytevector-u8-set!: index %d out of range" i)); + if byte < 0 || byte > 255 then + raise (Eval_error "bytevector-u8-set!: byte must be 0-255"); + Bytes.set b i (Char.chr byte); Nil + | _ -> raise (Eval_error "bytevector-u8-set!: expected bytevector index byte")); + register "bytevector-copy" (fun args -> + match args with + | [SxBytevector b] -> SxBytevector (Bytes.copy b) + | [SxBytevector b; Integer start] -> + let len = Bytes.length b - start in + SxBytevector (Bytes.sub b start len) + | [SxBytevector b; Integer start; Integer stop] -> + SxBytevector (Bytes.sub b start (stop - start)) + | _ -> raise (Eval_error "bytevector-copy: expected bytevector [start [end]]")); + register "bytevector-copy!" (fun args -> + let do_copy dst at src start stop = + let len = stop - start in + Bytes.blit src start dst at len; Nil + in + match args with + | [SxBytevector dst; Integer at; SxBytevector src] -> + do_copy dst at src 0 (Bytes.length src) + | [SxBytevector dst; Integer at; SxBytevector src; Integer start] -> + do_copy dst at src start (Bytes.length src) + | [SxBytevector dst; Integer at; SxBytevector src; Integer start; Integer stop] -> + do_copy dst at src start stop + | _ -> raise (Eval_error "bytevector-copy!: expected dst at src [start [end]]")); + register "bytevector-append" (fun args -> + let bufs = List.map (function + | SxBytevector b -> b + | _ -> raise (Eval_error "bytevector-append: expected bytevectors")) args in + let total = List.fold_left (fun acc b -> acc + Bytes.length b) 0 bufs in + let result = Bytes.create total in + let pos = ref 0 in + List.iter (fun b -> + let len = Bytes.length b in + Bytes.blit b 0 result !pos len; + pos := !pos + len) bufs; + SxBytevector result); + register "utf8->string" (fun args -> + match args with + | [SxBytevector b] -> String (Bytes.to_string b) + | [SxBytevector b; Integer start] -> + String (Bytes.sub_string b start (Bytes.length b - start)) + | [SxBytevector b; Integer start; Integer stop] -> + String (Bytes.sub_string b start (stop - start)) + | _ -> raise (Eval_error "utf8->string: expected bytevector [start [end]]")); + register "string->utf8" (fun args -> + match args with + | [String s] -> SxBytevector (Bytes.of_string s) + | [String s; Integer start] -> + let len = String.length s - start in + SxBytevector (Bytes.of_string (String.sub s start len)) + | [String s; Integer start; Integer stop] -> + SxBytevector (Bytes.of_string (String.sub s start (stop - start))) + | _ -> raise (Eval_error "string->utf8: expected string [start [end]]")); + register "bytevector->list" (fun args -> + match args with + | [SxBytevector b] -> + let items = List.init (Bytes.length b) (fun i -> Integer (Char.code (Bytes.get b i))) in + List items + | _ -> raise (Eval_error "bytevector->list: expected bytevector")); + register "list->bytevector" (fun args -> + match args with + | [List items] | [ListRef { contents = items }] -> + let bytes_list = List.map (function + | Integer n when n >= 0 && n <= 255 -> Char.chr n + | Integer n -> raise (Eval_error (Printf.sprintf "list->bytevector: byte %d out of range" n)) + | v -> raise (Eval_error ("list->bytevector: expected integer, got " ^ Sx_types.type_of v))) items in + let b = Bytes.create (List.length bytes_list) in + List.iteri (fun i c -> Bytes.set b i c) bytes_list; + SxBytevector b + | [Nil] -> SxBytevector (Bytes.create 0) + | _ -> raise (Eval_error "list->bytevector: expected list")) diff --git a/hosts/ocaml/lib/sx_types.ml b/hosts/ocaml/lib/sx_types.ml index cb1360b3..490ce093 100644 --- a/hosts/ocaml/lib/sx_types.ml +++ b/hosts/ocaml/lib/sx_types.ml @@ -81,6 +81,7 @@ and value = | Rational of int * int (** Exact rational: numerator, denominator (reduced, denom>0). *) | SxSet of (string, value) Hashtbl.t (** Mutable set keyed by inspect(value). *) | SxRegexp of string * string * Re.re (** Regexp: source, flags, compiled. *) + | SxBytevector of bytes (** Mutable bytevector — R7RS bytevector type. *) (** String input port: source string + mutable cursor position. *) and sx_port_kind = @@ -516,8 +517,9 @@ let type_of = function | Port { sp_kind = PortInput _; _ } -> "input-port" | Port { sp_kind = PortOutput _; _ } -> "output-port" | Rational _ -> "rational" - | SxSet _ -> "set" - | SxRegexp _ -> "regexp" + | SxSet _ -> "set" + | SxRegexp _ -> "regexp" + | SxBytevector _ -> "bytevector" let is_nil = function Nil -> true | _ -> false let is_lambda = function Lambda _ -> true | _ -> false @@ -882,3 +884,4 @@ let rec inspect = function | Rational (n, d) -> Printf.sprintf "%d/%d" n d | SxSet ht -> Printf.sprintf "" (Hashtbl.length ht) | SxRegexp (src, flags, _) -> Printf.sprintf "#/%s/%s" src flags + | SxBytevector b -> Printf.sprintf "#u8(%s)" (String.concat " " (List.init (Bytes.length b) (fun i -> string_of_int (Char.code (Bytes.get b i))))) diff --git a/spec/primitives.sx b/spec/primitives.sx index 59306a18..9aee9be7 100644 --- a/spec/primitives.sx +++ b/spec/primitives.sx @@ -1252,3 +1252,77 @@ :params ((re :as regexp) (str :as string)) :returns "list" :doc "Split str on every match of re; returns list of strings.") + +(define-module :stdlib.bytevectors) + +(define-primitive + "make-bytevector" + :params (n &rest fill) + :returns "bytevector" + :doc "Create a bytevector of n bytes, all initialised to fill (default 0).") + +(define-primitive + "bytevector?" + :params (v) + :returns "boolean" + :doc "True if v is a bytevector.") + +(define-primitive + "bytevector-length" + :params ((bv :as bytevector)) + :returns "number" + :doc "Number of bytes in bv.") + +(define-primitive + "bytevector-u8-ref" + :params ((bv :as bytevector) (i :as number)) + :returns "number" + :doc "Byte value 0-255 at index i.") + +(define-primitive + "bytevector-u8-set!" + :params ((bv :as bytevector) (i :as number) (byte :as number)) + :returns "nil" + :doc "Set byte at index i to byte 0-255. Mutates bv.") + +(define-primitive + "bytevector-copy" + :params ((bv :as bytevector) &rest bounds) + :returns "bytevector" + :doc "Fresh copy of bv, optionally sliced to [start, end).") + +(define-primitive + "bytevector-copy!" + :params ((dst :as bytevector) (at :as number) (src :as bytevector) &rest bounds) + :returns "nil" + :doc "Copy bytes from src[start..end) into dst starting at at. Mutates dst.") + +(define-primitive + "bytevector-append" + :params (&rest bvs) + :returns "bytevector" + :doc "Concatenate bytevectors into a new bytevector.") + +(define-primitive + "utf8->string" + :params ((bv :as bytevector) &rest bounds) + :returns "string" + :doc "Decode bv[start..end) as UTF-8 and return the string.") + +(define-primitive + "string->utf8" + :params ((s :as string) &rest bounds) + :returns "bytevector" + :doc "Encode s[start..end) as UTF-8 and return a bytevector.") + +(define-primitive + "bytevector->list" + :params ((bv :as bytevector)) + :returns "list" + :doc "Convert bytevector to a list of byte integers.") + +(define-primitive + "list->bytevector" + :params ((lst :as list)) + :returns "bytevector" + :doc "Build a bytevector from a list of byte integers 0-255.") diff --git a/spec/tests/test-bytevectors.sx b/spec/tests/test-bytevectors.sx new file mode 100644 index 00000000..6b24a072 --- /dev/null +++ b/spec/tests/test-bytevectors.sx @@ -0,0 +1,236 @@ +;; ========================================================================== +;; test-bytevectors.sx — Tests for bytevector primitives +;; ========================================================================== + +;; -------------------------------------------------------------------------- +;; make-bytevector / bytevector? +;; -------------------------------------------------------------------------- + +(defsuite + "bv:create" + (deftest + "make-bytevector returns bytevector" + (assert (bytevector? (make-bytevector 4)))) + (deftest + "make-bytevector zeroes by default" + (let + ((bv (make-bytevector 3))) + (assert + (and + (= (bytevector-u8-ref bv 0) 0) + (= (bytevector-u8-ref bv 1) 0) + (= (bytevector-u8-ref bv 2) 0))))) + (deftest + "make-bytevector with fill" + (let + ((bv (make-bytevector 3 42))) + (assert + (and + (= (bytevector-u8-ref bv 0) 42) + (= (bytevector-u8-ref bv 1) 42) + (= (bytevector-u8-ref bv 2) 42))))) + (deftest + "make-bytevector length 0" + (assert= (bytevector-length (make-bytevector 0)) 0)) + (deftest + "bytevector? true for bytevector" + (assert (bytevector? (make-bytevector 2)))) + (deftest + "bytevector? false for string" + (assert (not (bytevector? "hello")))) + (deftest "bytevector? false for nil" (assert (not (bytevector? nil)))) + (deftest + "bytevector? false for list" + (assert (not (bytevector? (list 1 2 3)))))) + +;; -------------------------------------------------------------------------- +;; bytevector-length / u8-ref / u8-set! +;; -------------------------------------------------------------------------- + +(defsuite + "bv:access" + (deftest + "bytevector-length" + (assert= (bytevector-length (make-bytevector 5)) 5)) + (deftest + "u8-ref reads fill byte" + (assert= + (bytevector-u8-ref (make-bytevector 4 99) 2) + 99)) + (deftest + "u8-set! mutates" + (let + ((bv (make-bytevector 3 0))) + (bytevector-u8-set! bv 1 200) + (assert= (bytevector-u8-ref bv 1) 200))) + (deftest + "u8-set! boundary byte 255" + (let + ((bv (make-bytevector 1 0))) + (bytevector-u8-set! bv 0 255) + (assert= (bytevector-u8-ref bv 0) 255))) + (deftest + "u8-set! byte 0" + (let + ((bv (make-bytevector 1 255))) + (bytevector-u8-set! bv 0 0) + (assert= (bytevector-u8-ref bv 0) 0)))) + +;; -------------------------------------------------------------------------- +;; bytevector-copy +;; -------------------------------------------------------------------------- + +(defsuite + "bv:copy" + (deftest + "copy produces equal content" + (let + ((bv (make-bytevector 3 7))) + (let + ((bv2 (bytevector-copy bv))) + (assert + (and + (= (bytevector-u8-ref bv2 0) 7) + (= (bytevector-u8-ref bv2 1) 7) + (= (bytevector-u8-ref bv2 2) 7)))))) + (deftest + "copy is independent" + (let + ((bv (make-bytevector 2 0))) + (let + ((bv2 (bytevector-copy bv))) + (bytevector-u8-set! bv2 0 99) + (assert= (bytevector-u8-ref bv 0) 0)))) + (deftest + "copy with start" + (let + ((bv (list->bytevector (list 10 20 30 40)))) + (let + ((bv2 (bytevector-copy bv 2))) + (assert + (and + (= (bytevector-length bv2) 2) + (= (bytevector-u8-ref bv2 0) 30)))))) + (deftest + "copy with start and end" + (let + ((bv (list->bytevector (list 10 20 30 40)))) + (let + ((bv2 (bytevector-copy bv 1 3))) + (assert + (and + (= (bytevector-length bv2) 2) + (= (bytevector-u8-ref bv2 0) 20) + (= (bytevector-u8-ref bv2 1) 30))))))) + +;; -------------------------------------------------------------------------- +;; bytevector-copy! +;; -------------------------------------------------------------------------- + +(defsuite + "bv:copy-bang" + (deftest + "copy! overwrites dst region" + (let + ((dst (make-bytevector 4 0))) + (let + ((src (list->bytevector (list 1 2 3)))) + (bytevector-copy! dst 1 src) + (assert + (and + (= (bytevector-u8-ref dst 0) 0) + (= (bytevector-u8-ref dst 1) 1) + (= (bytevector-u8-ref dst 2) 2) + (= (bytevector-u8-ref dst 3) 3)))))) + (deftest + "copy! with src bounds" + (let + ((dst (make-bytevector 2 0))) + (let + ((src (list->bytevector (list 10 20 30 40)))) + (bytevector-copy! dst 0 src 1 3) + (assert + (and + (= (bytevector-u8-ref dst 0) 20) + (= (bytevector-u8-ref dst 1) 30))))))) + +;; -------------------------------------------------------------------------- +;; bytevector-append +;; -------------------------------------------------------------------------- + +(defsuite + "bv:append" + (deftest + "append two bytevectors" + (let + ((bv (bytevector-append (list->bytevector (list 1 2)) (list->bytevector (list 3 4))))) + (assert + (and + (= (bytevector-length bv) 4) + (= (bytevector-u8-ref bv 0) 1) + (= (bytevector-u8-ref bv 3) 4))))) + (deftest + "append three bytevectors" + (let + ((bv (bytevector-append (list->bytevector (list 1)) (list->bytevector (list 2)) (list->bytevector (list 3))))) + (assert= (bytevector-length bv) 3))) + (deftest + "append empty" + (assert= + (bytevector-length + (bytevector-append + (make-bytevector 0) + (make-bytevector 0))) + 0))) + +;; -------------------------------------------------------------------------- +;; list->bytevector / bytevector->list +;; -------------------------------------------------------------------------- + +(defsuite + "bv:conversion" + (deftest + "list->bytevector roundtrip" + (let + ((lst (list 10 20 30))) + (assert= (bytevector->list (list->bytevector lst)) lst))) + (deftest + "list->bytevector empty" + (assert= (bytevector-length (list->bytevector nil)) 0)) + (deftest + "bytevector->list from make-bytevector" + (let + ((lst (bytevector->list (make-bytevector 3 5)))) + (assert= lst (list 5 5 5))))) + +;; -------------------------------------------------------------------------- +;; utf8 roundtrip +;; -------------------------------------------------------------------------- + +(defsuite + "bv:utf8" + (deftest + "string->utf8 returns bytevector" + (assert (bytevector? (string->utf8 "hello")))) + (deftest + "string->utf8 length" + (assert= (bytevector-length (string->utf8 "abc")) 3)) + (deftest + "utf8->string roundtrip" + (assert= (utf8->string (string->utf8 "hello")) "hello")) + (deftest + "utf8->string with slice" + (let + ((bv (string->utf8 "hello"))) + (assert= (utf8->string bv 1 4) "ell"))) + (deftest + "string->utf8 with start" + (assert= (utf8->string (string->utf8 "hello" 2)) "llo")) + (deftest + "string->utf8 with start and end" + (assert= + (utf8->string (string->utf8 "hello" 1 4)) + "ell")) + (deftest + "empty string round-trips" + (assert= (utf8->string (string->utf8 "")) "")))