spec: bytevectors (make-bytevector/u8-ref/u8-set!/utf8->string/etc)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-01 19:16:02 +00:00
parent 24e1a862fb
commit a381154507
5 changed files with 486 additions and 3 deletions

View File

@@ -1817,6 +1817,73 @@ PRIMITIVES_JS_MODULES: dict[str, str] = {
});
return out;
};
''',
"stdlib.bytevectors": '''
// stdlib.bytevectors — R7RS bytevector type backed by Uint8Array
function SxBytevector(size_or_buf) {
if (size_or_buf instanceof Uint8Array) {
this.data = size_or_buf;
} else {
this.data = new Uint8Array(typeof size_or_buf === "number" ? size_or_buf : 0);
}
this._bytevector = true;
}
SxBytevector.prototype._type = "bytevector";
PRIMITIVES["make-bytevector"] = function(n, fill) {
var bv = new SxBytevector(n);
if (fill !== undefined) bv.data.fill(fill & 0xff);
return bv;
};
PRIMITIVES["bytevector?"] = function(v) { return v instanceof SxBytevector; };
PRIMITIVES["bytevector-length"] = function(bv) { return bv.data.length; };
PRIMITIVES["bytevector-u8-ref"] = function(bv, i) { return bv.data[i]; };
PRIMITIVES["bytevector-u8-set!"] = function(bv, i, byte) { bv.data[i] = byte & 0xff; return NIL; };
PRIMITIVES["bytevector-copy"] = function(bv, start, end_) {
var s = start === undefined ? 0 : start;
var e = end_ === undefined ? bv.data.length : end_;
return new SxBytevector(bv.data.slice(s, e));
};
PRIMITIVES["bytevector-copy!"] = function(dst, at, src, start, end_) {
var s = start === undefined ? 0 : start;
var e = end_ === undefined ? src.data.length : end_;
dst.data.set(src.data.subarray(s, e), at);
return NIL;
};
PRIMITIVES["bytevector-append"] = function() {
var total = 0;
for (var i = 0; i < arguments.length; i++) total += arguments[i].data.length;
var result = new Uint8Array(total);
var pos = 0;
for (var i = 0; i < arguments.length; i++) {
result.set(arguments[i].data, pos);
pos += arguments[i].data.length;
}
return new SxBytevector(result);
};
PRIMITIVES["utf8->string"] = function(bv, start, end_) {
var s = start === undefined ? 0 : start;
var e = end_ === undefined ? bv.data.length : end_;
var dec = new TextDecoder("utf-8");
return dec.decode(bv.data.subarray(s, e));
};
PRIMITIVES["string->utf8"] = function(str, start, end_) {
var enc = new TextEncoder();
var full = enc.encode(str);
var s = start === undefined ? 0 : start;
var e = end_ === undefined ? full.length : end_;
return new SxBytevector(full.slice(s, e));
};
PRIMITIVES["bytevector->list"] = function(bv) {
var out = [];
for (var i = 0; i < bv.data.length; i++) out.push(bv.data[i]);
return out;
};
PRIMITIVES["list->bytevector"] = function(lst) {
if (!Array.isArray(lst)) lst = [];
var b = new Uint8Array(lst.length);
for (var i = 0; i < lst.length; i++) b[i] = lst[i] & 0xff;
return new SxBytevector(b);
};
''',
}
# Modules to include by default (all)
@@ -1864,6 +1931,7 @@ PLATFORM_JS_PRE = '''
if (x._hash_table) return "hash-table";
if (x._sxset) return "set";
if (x._regexp) return "regexp";
if (x._bytevector) return "bytevector";
if (x._rational) return "rational";
if (typeof Node !== "undefined" && x instanceof Node) return "dom-node";
if (Array.isArray(x)) return "list";

View File

@@ -2891,4 +2891,106 @@ let () =
let r = !Sx_types._cek_call_ref fn (List [v]) in
Hashtbl.replace out (set_key r) r) ht;
SxSet out
| _ -> raise (Eval_error "set-map: expected set fn"))
| _ -> raise (Eval_error "set-map: expected set fn"));
(* === Bytevectors === *)
register "make-bytevector" (fun args ->
match args with
| [Integer n] -> SxBytevector (Bytes.make n '\000')
| [Integer n; Integer fill] ->
if fill < 0 || fill > 255 then raise (Eval_error "make-bytevector: fill must be 0-255");
SxBytevector (Bytes.make n (Char.chr fill))
| _ -> raise (Eval_error "make-bytevector: expected n [fill]"));
register "bytevector?" (fun args ->
match args with
| [SxBytevector _] -> Bool true
| [_] -> Bool false
| _ -> raise (Eval_error "bytevector?: 1 arg"));
register "bytevector-length" (fun args ->
match args with
| [SxBytevector b] -> Integer (Bytes.length b)
| _ -> raise (Eval_error "bytevector-length: expected bytevector"));
register "bytevector-u8-ref" (fun args ->
match args with
| [SxBytevector b; Integer i] ->
if i < 0 || i >= Bytes.length b then
raise (Eval_error (Printf.sprintf "bytevector-u8-ref: index %d out of range" i));
Integer (Char.code (Bytes.get b i))
| _ -> raise (Eval_error "bytevector-u8-ref: expected bytevector index"));
register "bytevector-u8-set!" (fun args ->
match args with
| [SxBytevector b; Integer i; Integer byte] ->
if i < 0 || i >= Bytes.length b then
raise (Eval_error (Printf.sprintf "bytevector-u8-set!: index %d out of range" i));
if byte < 0 || byte > 255 then
raise (Eval_error "bytevector-u8-set!: byte must be 0-255");
Bytes.set b i (Char.chr byte); Nil
| _ -> raise (Eval_error "bytevector-u8-set!: expected bytevector index byte"));
register "bytevector-copy" (fun args ->
match args with
| [SxBytevector b] -> SxBytevector (Bytes.copy b)
| [SxBytevector b; Integer start] ->
let len = Bytes.length b - start in
SxBytevector (Bytes.sub b start len)
| [SxBytevector b; Integer start; Integer stop] ->
SxBytevector (Bytes.sub b start (stop - start))
| _ -> raise (Eval_error "bytevector-copy: expected bytevector [start [end]]"));
register "bytevector-copy!" (fun args ->
let do_copy dst at src start stop =
let len = stop - start in
Bytes.blit src start dst at len; Nil
in
match args with
| [SxBytevector dst; Integer at; SxBytevector src] ->
do_copy dst at src 0 (Bytes.length src)
| [SxBytevector dst; Integer at; SxBytevector src; Integer start] ->
do_copy dst at src start (Bytes.length src)
| [SxBytevector dst; Integer at; SxBytevector src; Integer start; Integer stop] ->
do_copy dst at src start stop
| _ -> raise (Eval_error "bytevector-copy!: expected dst at src [start [end]]"));
register "bytevector-append" (fun args ->
let bufs = List.map (function
| SxBytevector b -> b
| _ -> raise (Eval_error "bytevector-append: expected bytevectors")) args in
let total = List.fold_left (fun acc b -> acc + Bytes.length b) 0 bufs in
let result = Bytes.create total in
let pos = ref 0 in
List.iter (fun b ->
let len = Bytes.length b in
Bytes.blit b 0 result !pos len;
pos := !pos + len) bufs;
SxBytevector result);
register "utf8->string" (fun args ->
match args with
| [SxBytevector b] -> String (Bytes.to_string b)
| [SxBytevector b; Integer start] ->
String (Bytes.sub_string b start (Bytes.length b - start))
| [SxBytevector b; Integer start; Integer stop] ->
String (Bytes.sub_string b start (stop - start))
| _ -> raise (Eval_error "utf8->string: expected bytevector [start [end]]"));
register "string->utf8" (fun args ->
match args with
| [String s] -> SxBytevector (Bytes.of_string s)
| [String s; Integer start] ->
let len = String.length s - start in
SxBytevector (Bytes.of_string (String.sub s start len))
| [String s; Integer start; Integer stop] ->
SxBytevector (Bytes.of_string (String.sub s start (stop - start)))
| _ -> raise (Eval_error "string->utf8: expected string [start [end]]"));
register "bytevector->list" (fun args ->
match args with
| [SxBytevector b] ->
let items = List.init (Bytes.length b) (fun i -> Integer (Char.code (Bytes.get b i))) in
List items
| _ -> raise (Eval_error "bytevector->list: expected bytevector"));
register "list->bytevector" (fun args ->
match args with
| [List items] | [ListRef { contents = items }] ->
let bytes_list = List.map (function
| Integer n when n >= 0 && n <= 255 -> Char.chr n
| Integer n -> raise (Eval_error (Printf.sprintf "list->bytevector: byte %d out of range" n))
| v -> raise (Eval_error ("list->bytevector: expected integer, got " ^ Sx_types.type_of v))) items in
let b = Bytes.create (List.length bytes_list) in
List.iteri (fun i c -> Bytes.set b i c) bytes_list;
SxBytevector b
| [Nil] -> SxBytevector (Bytes.create 0)
| _ -> raise (Eval_error "list->bytevector: expected list"))

View File

@@ -81,6 +81,7 @@ and value =
| Rational of int * int (** Exact rational: numerator, denominator (reduced, denom>0). *)
| SxSet of (string, value) Hashtbl.t (** Mutable set keyed by inspect(value). *)
| SxRegexp of string * string * Re.re (** Regexp: source, flags, compiled. *)
| SxBytevector of bytes (** Mutable bytevector — R7RS bytevector type. *)
(** String input port: source string + mutable cursor position. *)
and sx_port_kind =
@@ -516,8 +517,9 @@ let type_of = function
| Port { sp_kind = PortInput _; _ } -> "input-port"
| Port { sp_kind = PortOutput _; _ } -> "output-port"
| Rational _ -> "rational"
| SxSet _ -> "set"
| SxRegexp _ -> "regexp"
| SxSet _ -> "set"
| SxRegexp _ -> "regexp"
| SxBytevector _ -> "bytevector"
let is_nil = function Nil -> true | _ -> false
let is_lambda = function Lambda _ -> true | _ -> false
@@ -882,3 +884,4 @@ let rec inspect = function
| Rational (n, d) -> Printf.sprintf "%d/%d" n d
| SxSet ht -> Printf.sprintf "<set:%d>" (Hashtbl.length ht)
| SxRegexp (src, flags, _) -> Printf.sprintf "#/%s/%s" src flags
| SxBytevector b -> Printf.sprintf "#u8(%s)" (String.concat " " (List.init (Bytes.length b) (fun i -> string_of_int (Char.code (Bytes.get b i)))))

View File

@@ -1252,3 +1252,77 @@
:params ((re :as regexp) (str :as string))
:returns "list"
:doc "Split str on every match of re; returns list of strings.")
(define-module :stdlib.bytevectors)
(define-primitive
"make-bytevector"
:params (n &rest fill)
:returns "bytevector"
:doc "Create a bytevector of n bytes, all initialised to fill (default 0).")
(define-primitive
"bytevector?"
:params (v)
:returns "boolean"
:doc "True if v is a bytevector.")
(define-primitive
"bytevector-length"
:params ((bv :as bytevector))
:returns "number"
:doc "Number of bytes in bv.")
(define-primitive
"bytevector-u8-ref"
:params ((bv :as bytevector) (i :as number))
:returns "number"
:doc "Byte value 0-255 at index i.")
(define-primitive
"bytevector-u8-set!"
:params ((bv :as bytevector) (i :as number) (byte :as number))
:returns "nil"
:doc "Set byte at index i to byte 0-255. Mutates bv.")
(define-primitive
"bytevector-copy"
:params ((bv :as bytevector) &rest bounds)
:returns "bytevector"
:doc "Fresh copy of bv, optionally sliced to [start, end).")
(define-primitive
"bytevector-copy!"
:params ((dst :as bytevector) (at :as number) (src :as bytevector) &rest bounds)
:returns "nil"
:doc "Copy bytes from src[start..end) into dst starting at at. Mutates dst.")
(define-primitive
"bytevector-append"
:params (&rest bvs)
:returns "bytevector"
:doc "Concatenate bytevectors into a new bytevector.")
(define-primitive
"utf8->string"
:params ((bv :as bytevector) &rest bounds)
:returns "string"
:doc "Decode bv[start..end) as UTF-8 and return the string.")
(define-primitive
"string->utf8"
:params ((s :as string) &rest bounds)
:returns "bytevector"
:doc "Encode s[start..end) as UTF-8 and return a bytevector.")
(define-primitive
"bytevector->list"
:params ((bv :as bytevector))
:returns "list"
:doc "Convert bytevector to a list of byte integers.")
(define-primitive
"list->bytevector"
:params ((lst :as list))
:returns "bytevector"
:doc "Build a bytevector from a list of byte integers 0-255.")

View File

@@ -0,0 +1,236 @@
;; ==========================================================================
;; test-bytevectors.sx — Tests for bytevector primitives
;; ==========================================================================
;; --------------------------------------------------------------------------
;; make-bytevector / bytevector?
;; --------------------------------------------------------------------------
(defsuite
"bv:create"
(deftest
"make-bytevector returns bytevector"
(assert (bytevector? (make-bytevector 4))))
(deftest
"make-bytevector zeroes by default"
(let
((bv (make-bytevector 3)))
(assert
(and
(= (bytevector-u8-ref bv 0) 0)
(= (bytevector-u8-ref bv 1) 0)
(= (bytevector-u8-ref bv 2) 0)))))
(deftest
"make-bytevector with fill"
(let
((bv (make-bytevector 3 42)))
(assert
(and
(= (bytevector-u8-ref bv 0) 42)
(= (bytevector-u8-ref bv 1) 42)
(= (bytevector-u8-ref bv 2) 42)))))
(deftest
"make-bytevector length 0"
(assert= (bytevector-length (make-bytevector 0)) 0))
(deftest
"bytevector? true for bytevector"
(assert (bytevector? (make-bytevector 2))))
(deftest
"bytevector? false for string"
(assert (not (bytevector? "hello"))))
(deftest "bytevector? false for nil" (assert (not (bytevector? nil))))
(deftest
"bytevector? false for list"
(assert (not (bytevector? (list 1 2 3))))))
;; --------------------------------------------------------------------------
;; bytevector-length / u8-ref / u8-set!
;; --------------------------------------------------------------------------
(defsuite
"bv:access"
(deftest
"bytevector-length"
(assert= (bytevector-length (make-bytevector 5)) 5))
(deftest
"u8-ref reads fill byte"
(assert=
(bytevector-u8-ref (make-bytevector 4 99) 2)
99))
(deftest
"u8-set! mutates"
(let
((bv (make-bytevector 3 0)))
(bytevector-u8-set! bv 1 200)
(assert= (bytevector-u8-ref bv 1) 200)))
(deftest
"u8-set! boundary byte 255"
(let
((bv (make-bytevector 1 0)))
(bytevector-u8-set! bv 0 255)
(assert= (bytevector-u8-ref bv 0) 255)))
(deftest
"u8-set! byte 0"
(let
((bv (make-bytevector 1 255)))
(bytevector-u8-set! bv 0 0)
(assert= (bytevector-u8-ref bv 0) 0))))
;; --------------------------------------------------------------------------
;; bytevector-copy
;; --------------------------------------------------------------------------
(defsuite
"bv:copy"
(deftest
"copy produces equal content"
(let
((bv (make-bytevector 3 7)))
(let
((bv2 (bytevector-copy bv)))
(assert
(and
(= (bytevector-u8-ref bv2 0) 7)
(= (bytevector-u8-ref bv2 1) 7)
(= (bytevector-u8-ref bv2 2) 7))))))
(deftest
"copy is independent"
(let
((bv (make-bytevector 2 0)))
(let
((bv2 (bytevector-copy bv)))
(bytevector-u8-set! bv2 0 99)
(assert= (bytevector-u8-ref bv 0) 0))))
(deftest
"copy with start"
(let
((bv (list->bytevector (list 10 20 30 40))))
(let
((bv2 (bytevector-copy bv 2)))
(assert
(and
(= (bytevector-length bv2) 2)
(= (bytevector-u8-ref bv2 0) 30))))))
(deftest
"copy with start and end"
(let
((bv (list->bytevector (list 10 20 30 40))))
(let
((bv2 (bytevector-copy bv 1 3)))
(assert
(and
(= (bytevector-length bv2) 2)
(= (bytevector-u8-ref bv2 0) 20)
(= (bytevector-u8-ref bv2 1) 30)))))))
;; --------------------------------------------------------------------------
;; bytevector-copy!
;; --------------------------------------------------------------------------
(defsuite
"bv:copy-bang"
(deftest
"copy! overwrites dst region"
(let
((dst (make-bytevector 4 0)))
(let
((src (list->bytevector (list 1 2 3))))
(bytevector-copy! dst 1 src)
(assert
(and
(= (bytevector-u8-ref dst 0) 0)
(= (bytevector-u8-ref dst 1) 1)
(= (bytevector-u8-ref dst 2) 2)
(= (bytevector-u8-ref dst 3) 3))))))
(deftest
"copy! with src bounds"
(let
((dst (make-bytevector 2 0)))
(let
((src (list->bytevector (list 10 20 30 40))))
(bytevector-copy! dst 0 src 1 3)
(assert
(and
(= (bytevector-u8-ref dst 0) 20)
(= (bytevector-u8-ref dst 1) 30)))))))
;; --------------------------------------------------------------------------
;; bytevector-append
;; --------------------------------------------------------------------------
(defsuite
"bv:append"
(deftest
"append two bytevectors"
(let
((bv (bytevector-append (list->bytevector (list 1 2)) (list->bytevector (list 3 4)))))
(assert
(and
(= (bytevector-length bv) 4)
(= (bytevector-u8-ref bv 0) 1)
(= (bytevector-u8-ref bv 3) 4)))))
(deftest
"append three bytevectors"
(let
((bv (bytevector-append (list->bytevector (list 1)) (list->bytevector (list 2)) (list->bytevector (list 3)))))
(assert= (bytevector-length bv) 3)))
(deftest
"append empty"
(assert=
(bytevector-length
(bytevector-append
(make-bytevector 0)
(make-bytevector 0)))
0)))
;; --------------------------------------------------------------------------
;; list->bytevector / bytevector->list
;; --------------------------------------------------------------------------
(defsuite
"bv:conversion"
(deftest
"list->bytevector roundtrip"
(let
((lst (list 10 20 30)))
(assert= (bytevector->list (list->bytevector lst)) lst)))
(deftest
"list->bytevector empty"
(assert= (bytevector-length (list->bytevector nil)) 0))
(deftest
"bytevector->list from make-bytevector"
(let
((lst (bytevector->list (make-bytevector 3 5))))
(assert= lst (list 5 5 5)))))
;; --------------------------------------------------------------------------
;; utf8 roundtrip
;; --------------------------------------------------------------------------
(defsuite
"bv:utf8"
(deftest
"string->utf8 returns bytevector"
(assert (bytevector? (string->utf8 "hello"))))
(deftest
"string->utf8 length"
(assert= (bytevector-length (string->utf8 "abc")) 3))
(deftest
"utf8->string roundtrip"
(assert= (utf8->string (string->utf8 "hello")) "hello"))
(deftest
"utf8->string with slice"
(let
((bv (string->utf8 "hello")))
(assert= (utf8->string bv 1 4) "ell")))
(deftest
"string->utf8 with start"
(assert= (utf8->string (string->utf8 "hello" 2)) "llo"))
(deftest
"string->utf8 with start and end"
(assert=
(utf8->string (string->utf8 "hello" 1 4))
"ell"))
(deftest
"empty string round-trips"
(assert= (utf8->string (string->utf8 "")) "")))