Add .sxbc s-expression bytecode format
Bytecode modules are now serialized as s-expressions (.sxbc) in addition to JSON (.sxbc.json). The .sxbc format is the canonical representation — content-addressable, parseable by the SX parser, and suitable for CID referencing. Annotation layers (source maps, variable names, tests, docs) can reference the bytecode CID without polluting the bytecode itself. Format: (sxbc version hash (code :arity N :bytecode (...) :constants (...))) The browser loader tries .sxbc first (via load-sxbc kernel primitive), falls back to .sxbc.json. Caddy needs .sxbc MIME type to serve the new format (currently 404s, JSON fallback works). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* compile-modules.js — Pre-compile .sx files to bytecode JSON.
|
||||
* compile-modules.js — Pre-compile .sx files to bytecode s-expressions.
|
||||
*
|
||||
* Uses the js_of_ocaml kernel in Node.js to compile each .sx module,
|
||||
* then serializes the bytecode as JSON for fast browser loading.
|
||||
* then serializes the bytecode as .sxbc (s-expression format) for browser loading.
|
||||
*
|
||||
* Usage: node compile-modules.js [dist-dir]
|
||||
*/
|
||||
@@ -59,7 +59,6 @@ for (const file of FILES) {
|
||||
const hash = crypto.createHash('sha256').update(src).digest('hex').slice(0, 16);
|
||||
|
||||
try {
|
||||
// Parse source to get expression list, then compile
|
||||
const code = K.eval('(compile-module (sx-parse ' + JSON.stringify(src) + '))');
|
||||
|
||||
if (typeof code === 'string' && code.startsWith('Error')) {
|
||||
@@ -68,15 +67,22 @@ for (const file of FILES) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const sx = serializeModuleToSx(code, hash);
|
||||
|
||||
// Write .sxbc (s-expression format)
|
||||
const outPath = srcPath.replace(/\.sx$/, '.sxbc');
|
||||
fs.writeFileSync(outPath, sx);
|
||||
|
||||
// Also write .sxbc.json for backwards compatibility during transition
|
||||
const json = {
|
||||
magic: 'SXBC',
|
||||
version: 1,
|
||||
hash: hash,
|
||||
module: serializeModule(code),
|
||||
module: serializeModuleToJson(code),
|
||||
};
|
||||
const jsonPath = srcPath.replace(/\.sx$/, '.sxbc.json');
|
||||
fs.writeFileSync(jsonPath, JSON.stringify(json));
|
||||
|
||||
const outPath = srcPath.replace(/\.sx$/, '.sxbc.json');
|
||||
fs.writeFileSync(outPath, JSON.stringify(json));
|
||||
const size = fs.statSync(outPath).size;
|
||||
console.log(' ok', file, '→', Math.round(size / 1024) + 'K');
|
||||
compiled++;
|
||||
@@ -88,13 +94,93 @@ for (const file of FILES) {
|
||||
|
||||
console.log('Done:', compiled, 'compiled,', skipped, 'skipped');
|
||||
|
||||
// --- Serialization ---
|
||||
// --- S-expression serialization ---
|
||||
|
||||
function serializeModule(code) {
|
||||
return {
|
||||
function serializeModuleToSx(code, hash) {
|
||||
return '(sxbc 1 "' + hash + '"\n ' + serializeCodeToSx(code, 2) + ')\n';
|
||||
}
|
||||
|
||||
function serializeCodeToSx(code, indent) {
|
||||
const pad = ' '.repeat(indent);
|
||||
const bc = extractList(code.bytecode);
|
||||
const consts = extractList(code.constants);
|
||||
const arity = code.arity || code['arity'] || 0;
|
||||
const uvc = code['upvalue-count'] || 0;
|
||||
|
||||
let parts = ['(code'];
|
||||
if (arity) parts.push(' :arity ' + arity);
|
||||
if (uvc) parts.push(' :upvalue-count ' + uvc);
|
||||
parts.push('\n' + pad + ' :bytecode (' + bc.join(' ') + ')');
|
||||
parts.push('\n' + pad + ' :constants (');
|
||||
|
||||
const constStrs = consts.map(c => serializeConstToSx(c, indent + 4));
|
||||
if (constStrs.length > 0) {
|
||||
parts.push('\n' + constStrs.map(s => pad + ' ' + s).join('\n'));
|
||||
parts.push(')');
|
||||
} else {
|
||||
parts[parts.length - 1] += ')';
|
||||
}
|
||||
parts.push(')');
|
||||
return parts.join('');
|
||||
}
|
||||
|
||||
function serializeConstToSx(c, indent) {
|
||||
if (c === null || c === undefined) return 'nil';
|
||||
if (typeof c === 'number') return String(c);
|
||||
if (typeof c === 'string') return '"' + c.replace(/\\/g, '\\\\').replace(/"/g, '\\"') + '"';
|
||||
if (typeof c === 'boolean') return c ? 'true' : 'false';
|
||||
if (c._type === 'symbol') return "'" + c.name;
|
||||
if (c._type === 'keyword') return ':' + c.name;
|
||||
if (c._type === 'list') {
|
||||
const items = extractList(c).map(x => serializeConstToSx(x, indent));
|
||||
return '(list ' + items.join(' ') + ')';
|
||||
}
|
||||
// Code object (nested lambda bytecode)
|
||||
if (c.bytecode) return serializeCodeToSx(c, indent);
|
||||
if (c._type === 'dict') {
|
||||
const bc = c.get ? c.get('bytecode') : c.bytecode;
|
||||
if (bc) return serializeCodeToSx(c, indent);
|
||||
// Regular dict — serialize as {:key val ...}
|
||||
const entries = [];
|
||||
if (c.forEach) c.forEach((v, k) => { entries.push(':' + k + ' ' + serializeConstToSx(v, indent)); });
|
||||
return '{' + entries.join(' ') + '}';
|
||||
}
|
||||
return 'nil';
|
||||
}
|
||||
|
||||
// --- JSON serialization (backwards compat) ---
|
||||
|
||||
function serializeModuleToJson(code) {
|
||||
const result = {
|
||||
bytecode: extractList(code.bytecode),
|
||||
constants: extractList(code.constants).map(serializeConstant),
|
||||
constants: extractList(code.constants).map(serializeConstantJson),
|
||||
};
|
||||
const arity = code.arity || code['arity'];
|
||||
const uvc = code['upvalue-count'];
|
||||
const locals = code.locals || code['locals'];
|
||||
if (arity) result.arity = typeof arity === 'number' ? arity : 0;
|
||||
if (uvc) result['upvalue-count'] = typeof uvc === 'number' ? uvc : 0;
|
||||
if (locals) result.locals = typeof locals === 'number' ? locals : 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
function serializeConstantJson(c) {
|
||||
if (c === null || c === undefined) return { t: 'nil' };
|
||||
if (typeof c === 'number') return { t: 'n', v: c };
|
||||
if (typeof c === 'string') return { t: 's', v: c };
|
||||
if (typeof c === 'boolean') return { t: 'b', v: c };
|
||||
if (c._type === 'symbol') return { t: 'sym', v: c.name };
|
||||
if (c._type === 'keyword') return { t: 'kw', v: c.name };
|
||||
if (c._type === 'list') return { t: 'list', v: extractList(c).map(serializeConstantJson) };
|
||||
if (c.bytecode) return { t: 'code', v: serializeModuleToJson(c) };
|
||||
if (c._type === 'dict') {
|
||||
const bc = c.get ? c.get('bytecode') : c.bytecode;
|
||||
if (bc) return { t: 'code', v: serializeModuleToJson(c) };
|
||||
const entries = {};
|
||||
if (c.forEach) c.forEach((v, k) => { entries[k] = serializeConstantJson(v); });
|
||||
return { t: 'dict', v: entries };
|
||||
}
|
||||
return { t: 'nil' };
|
||||
}
|
||||
|
||||
function extractList(v) {
|
||||
@@ -104,25 +190,3 @@ function extractList(v) {
|
||||
if (v.items) return v.items;
|
||||
return [];
|
||||
}
|
||||
|
||||
function serializeConstant(c) {
|
||||
if (c === null || c === undefined) return { t: 'nil' };
|
||||
if (typeof c === 'number') return { t: 'n', v: c };
|
||||
if (typeof c === 'string') return { t: 's', v: c };
|
||||
if (typeof c === 'boolean') return { t: 'b', v: c };
|
||||
if (c._type === 'symbol') return { t: 'sym', v: c.name };
|
||||
if (c._type === 'keyword') return { t: 'kw', v: c.name };
|
||||
if (c._type === 'list') return { t: 'list', v: extractList(c).map(serializeConstant) };
|
||||
// Code object (nested lambda bytecode)
|
||||
if (c.bytecode) return { t: 'code', v: serializeModule(c) };
|
||||
if (c._type === 'dict') {
|
||||
// Check if it's a code object stored as dict
|
||||
const bc = c.get ? c.get('bytecode') : c.bytecode;
|
||||
if (bc) return { t: 'code', v: serializeModule(c) };
|
||||
// Regular dict
|
||||
const entries = {};
|
||||
if (c.forEach) c.forEach((v, k) => { entries[k] = serializeConstant(v); });
|
||||
return { t: 'dict', v: entries };
|
||||
}
|
||||
return { t: 'nil' };
|
||||
}
|
||||
|
||||
@@ -503,6 +503,41 @@ let () =
|
||||
let d = Hashtbl.create 2 in Hashtbl.replace d "ok" (Bool false); Hashtbl.replace d "error" (String msg); Dict d)
|
||||
| _ -> raise (Eval_error "try-call: 1 arg"));
|
||||
|
||||
(* --- Bytecode loading from s-expression format ---
|
||||
(sxbc version hash (code :arity N :upvalue-count N :bytecode (...) :constants (...)))
|
||||
Recursively converts the SX tree into the dict format that loadModule expects. *)
|
||||
bind "load-sxbc" (fun args ->
|
||||
match args with
|
||||
| [List (_ :: _ :: _ :: code_form :: _)] | [List (_ :: _ :: code_form :: _)] ->
|
||||
let rec convert_code form =
|
||||
match form with
|
||||
| List (Symbol "code" :: rest) ->
|
||||
let d = Hashtbl.create 8 in
|
||||
let rec parse_kv = function
|
||||
| Keyword "arity" :: Number n :: rest -> Hashtbl.replace d "arity" (Number n); parse_kv rest
|
||||
| Keyword "upvalue-count" :: Number n :: rest -> Hashtbl.replace d "upvalue-count" (Number n); parse_kv rest
|
||||
| Keyword "bytecode" :: List nums :: rest ->
|
||||
Hashtbl.replace d "bytecode" (List nums); parse_kv rest
|
||||
| Keyword "constants" :: List consts :: rest ->
|
||||
Hashtbl.replace d "constants" (List (List.map convert_const consts)); parse_kv rest
|
||||
| _ :: rest -> parse_kv rest (* skip unknown keywords *)
|
||||
| [] -> ()
|
||||
in
|
||||
parse_kv rest;
|
||||
Dict d
|
||||
| _ -> raise (Eval_error ("load-sxbc: expected (code ...), got " ^ type_of form))
|
||||
and convert_const = function
|
||||
| List (Symbol "code" :: _) as form -> convert_code form
|
||||
| List (Symbol "list" :: items) -> List (List.map convert_const items)
|
||||
| v -> v (* strings, numbers, booleans, nil, symbols, keywords pass through *)
|
||||
in
|
||||
let module_val = convert_code code_form in
|
||||
let code = Sx_vm.code_from_value module_val in
|
||||
let _result = Sx_vm.execute_module code _vm_globals in
|
||||
sync_vm_to_env ();
|
||||
Number (float_of_int (Hashtbl.length _vm_globals))
|
||||
| _ -> raise (Eval_error "load-sxbc: expected (sxbc version hash (code ...))"));
|
||||
|
||||
(* --- List mutation --- *)
|
||||
bind "append!" (fun args ->
|
||||
match args with
|
||||
|
||||
Reference in New Issue
Block a user