Native bytecode compiler: 8x faster, compile-blob command

Rewrite compile-modules.js to use the native OCaml sx_server binary
instead of the js_of_ocaml kernel in Node.js. Compiles 23 modules in
23s (was 3+ minutes). Uses batch epoch protocol with latin1 encoding
to preserve byte positions for multi-byte UTF-8 content.

- Add compile-blob server command: parse source natively, compile via
  SX compile-module, return bytecode dict
- Fix orchestration.sxbc.json and boot.sxbc.json — never compiled
  successfully with the old JS kernel, now work with native compiler
- Auto-copy compiled bytecode to shared/static/wasm/sx/ for serving

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-30 09:49:28 +00:00
parent 7a4a6c8a85
commit 1985c648eb
6 changed files with 188 additions and 147 deletions

View File

@@ -829,6 +829,29 @@ let rec dispatch env cmd =
let src = read_blob () in
dispatch env (List [Symbol "eval"; String src])
| List [Symbol "compile-blob"] ->
(* Read source as blob, parse natively in OCaml, compile via SX compile-module.
Returns the bytecode dict as SX text. Much faster than JS kernel. *)
let src = read_blob () in
(try
let exprs = Sx_parser.parse_all src in
let compile_module = env_get env "compile-module" in
let result = Sx_ref.cek_call compile_module (List [List exprs]) in
let rec raw_serialize = function
| Nil -> "nil"
| Bool true -> "true" | Bool false -> "false"
| Number n -> if Float.is_integer n then string_of_int (int_of_float n) else Printf.sprintf "%g" n
| String s -> "\"" ^ escape_sx_string s ^ "\""
| Symbol s -> s | Keyword k -> ":" ^ k
| List items | ListRef { contents = items } -> "(" ^ String.concat " " (List.map raw_serialize items) ^ ")"
| Dict d -> let pairs = Hashtbl.fold (fun k v acc -> (Printf.sprintf ":%s %s" k (raw_serialize v)) :: acc) d [] in "{" ^ String.concat " " pairs ^ "}"
| SxExpr s -> s | _ -> "nil"
in
send_ok_raw (raw_serialize result)
with
| Eval_error msg -> send_error msg
| exn -> send_error (Printexc.to_string exn))
| List [Symbol "eval"; String src] ->
(try
let exprs = Sx_parser.parse_all src in

View File

@@ -2,8 +2,9 @@
/**
* compile-modules.js — Pre-compile .sx files to bytecode s-expressions.
*
* Uses the js_of_ocaml kernel in Node.js to compile each .sx module,
* then serializes the bytecode as .sxbc (s-expression format) for browser loading.
* Uses the native OCaml sx_server binary for compilation (~5x faster than
* the js_of_ocaml kernel). Sends source via the blob protocol, receives
* compiled bytecode as SX text.
*
* Usage: node compile-modules.js [dist-dir]
*/
@@ -11,6 +12,7 @@
const fs = require('fs');
const path = require('path');
const crypto = require('crypto');
const { execSync, spawnSync } = require('child_process');
const distDir = process.argv[2] || path.join(__dirname, 'dist');
const sxDir = path.join(distDir, 'sx');
@@ -20,15 +22,16 @@ if (!fs.existsSync(sxDir)) {
process.exit(1);
}
// Load the js_of_ocaml kernel
const kernelPath = path.join(__dirname, '..', '_build', 'default', 'browser', 'sx_browser.bc.js');
if (!fs.existsSync(kernelPath)) {
console.error('Kernel not found:', kernelPath);
// Find the native OCaml binary
const binPaths = [
path.join(__dirname, '..', '_build', 'default', 'bin', 'sx_server.exe'),
'/app/bin/sx_server',
];
const binPath = binPaths.find(p => fs.existsSync(p));
if (!binPath) {
console.error('sx_server binary not found at:', binPaths.join(', '));
process.exit(1);
}
require(kernelPath);
const K = globalThis.SxKernel;
if (!K) { console.error('SxKernel not initialized'); process.exit(1); }
const FILES = [
'render.sx', 'core-signals.sx', 'signals.sx', 'deps.sx', 'router.sx',
@@ -38,155 +41,162 @@ const FILES = [
'harness-web.sx', 'engine.sx', 'orchestration.sx', 'boot.sx',
];
// Load all files to build up the env (need compiler loaded)
console.log('Loading SX environment...');
for (const file of FILES) {
const r = K.load(fs.readFileSync(path.join(sxDir, file), 'utf8'));
if (typeof r === 'string' && r.startsWith('Error')) {
console.error(' FAIL', file, r);
process.exit(1);
}
}
console.log(' ' + FILES.length + ' files loaded');
// ---------------------------------------------------------------------------
// Build the full input script — all commands in one batch
// ---------------------------------------------------------------------------
// Compile each file to bytecode
console.log('Compiling bytecode modules...');
const t0 = Date.now();
console.log('Building compilation script...');
let epoch = 1;
let script = '';
// Load compiler
script += `(epoch ${epoch++})\n(load "lib/compiler.sx")\n`;
// JIT pre-compile the compiler
script += `(epoch ${epoch++})\n(vm-compile-adapter)\n`;
// Load all modules into env
for (const file of FILES) {
const src = fs.readFileSync(path.join(sxDir, file), 'utf8');
const buf = Buffer.from(src, 'utf8');
script += `(epoch ${epoch++})\n(eval-blob)\n(blob ${buf.length})\n`;
script += src + '\n';
}
// Compile each module
const compileEpochs = {};
for (const file of FILES) {
const src = fs.readFileSync(path.join(sxDir, file), 'utf8');
const buf = Buffer.from(src, 'utf8');
const ep = epoch++;
compileEpochs[ep] = file;
script += `(epoch ${ep})\n(compile-blob)\n(blob ${buf.length})\n`;
script += src + '\n';
}
// Write script to temp file and pipe to server
const tmpFile = '/tmp/sx-compile-script.txt';
fs.writeFileSync(tmpFile, script);
console.log('Running native OCaml compiler (' + FILES.length + ' files)...');
const t1 = Date.now();
const result = spawnSync(binPath, [], {
input: fs.readFileSync(tmpFile),
maxBuffer: 100 * 1024 * 1024, // 100MB
timeout: 300000, // 5 min
stdio: ['pipe', 'pipe', 'pipe'],
});
if (result.error) {
console.error('Server error:', result.error);
process.exit(1);
}
const stderr = result.stderr.toString();
process.stderr.write(stderr);
// Use latin1 to preserve byte positions (UTF-8 multi-byte chars stay as-is in length)
const stdoutBuf = result.stdout;
const stdout = stdoutBuf.toString('latin1');
const dt = Date.now() - t1;
console.log('Server finished in ' + Math.round(dt / 1000) + 's');
// ---------------------------------------------------------------------------
// Parse responses — extract compiled bytecode for each file
// ---------------------------------------------------------------------------
// Parse responses — stdout is latin1 so byte positions match string positions
let compiled = 0, skipped = 0;
let pos = 0;
for (const file of FILES) {
const srcPath = path.join(sxDir, file);
const src = fs.readFileSync(srcPath, 'utf8');
const hash = crypto.createHash('sha256').update(src).digest('hex').slice(0, 16);
function nextLine() {
const nl = stdout.indexOf('\n', pos);
if (nl === -1) return null;
const line = stdout.slice(pos, nl);
pos = nl + 1;
return line;
}
try {
const code = K.eval('(compile-module (sx-parse ' + JSON.stringify(src) + '))');
while (pos < stdout.length) {
const line = nextLine();
if (line === null) break;
const trimmed = line.trim();
if (typeof code === 'string' && code.startsWith('Error')) {
console.error(' SKIP', file, '—', code);
skipped++;
continue;
// ok-len EPOCH LEN — read LEN bytes as value
const lenMatch = trimmed.match(/^\(ok-len (\d+) (\d+)\)$/);
if (lenMatch) {
const ep = parseInt(lenMatch[1]);
const len = parseInt(lenMatch[2]);
// Read exactly len bytes — latin1 encoding preserves byte positions
const rawValue = stdout.slice(pos, pos + len);
// Re-encode to proper UTF-8
const value = Buffer.from(rawValue, 'latin1').toString('utf8');
pos += len;
// skip trailing newline
if (pos < stdout.length && stdout.charCodeAt(pos) === 10) pos++;
const file = compileEpochs[ep];
if (file) {
if (value === 'nil' || value.startsWith('(error')) {
console.error(' SKIP', file, '—', value.slice(0, 60));
skipped++;
} else {
const hash = crypto.createHash('sha256')
.update(fs.readFileSync(path.join(sxDir, file), 'utf8'))
.digest('hex').slice(0, 16);
const sxbc = '(sxbc 1 "' + hash + '"\n (code\n ' +
value.replace(/^\{/, '').replace(/\}$/, '').trim() + '))\n';
const outPath = path.join(sxDir, file.replace(/\.sx$/, '.sxbc'));
fs.writeFileSync(outPath, sxbc);
const size = fs.statSync(outPath).size;
console.log(' ok', file, '→', Math.round(size / 1024) + 'K');
compiled++;
}
}
continue;
}
const sx = serializeModuleToSx(code, hash);
// Write .sxbc (s-expression format)
const outPath = srcPath.replace(/\.sx$/, '.sxbc');
fs.writeFileSync(outPath, sx);
// Also write .sxbc.json for backwards compatibility during transition
const json = {
magic: 'SXBC',
version: 1,
hash: hash,
module: serializeModuleToJson(code),
};
const jsonPath = srcPath.replace(/\.sx$/, '.sxbc.json');
fs.writeFileSync(jsonPath, JSON.stringify(json));
const size = fs.statSync(outPath).size;
console.log(' ok', file, '→', Math.round(size / 1024) + 'K');
compiled++;
} catch (e) {
console.error(' SKIP', file, '—', e.message || e);
skipped++;
// Simple ok or error — skip
if (trimmed.match(/^\(ok \d+/) || trimmed.match(/^\(error \d+/)) {
if (trimmed.match(/^\(error/)) {
const epMatch = trimmed.match(/^\(error (\d+)/);
if (epMatch) {
const ep = parseInt(epMatch[1]);
const file = compileEpochs[ep];
if (file) {
console.error(' SKIP', file, '—', trimmed.slice(0, 80));
skipped++;
}
}
}
continue;
}
}
console.log('Done:', compiled, 'compiled,', skipped, 'skipped');
// --- S-expression serialization ---
function serializeModuleToSx(code, hash) {
return '(sxbc 1 "' + hash + '"\n ' + serializeCodeToSx(code, 2) + ')\n';
}
function serializeCodeToSx(code, indent) {
const pad = ' '.repeat(indent);
const bc = extractList(code.bytecode);
const consts = extractList(code.constants);
const arity = code.arity || code['arity'] || 0;
const uvc = code['upvalue-count'] || 0;
let parts = ['(code'];
if (arity) parts.push(' :arity ' + arity);
if (uvc) parts.push(' :upvalue-count ' + uvc);
parts.push('\n' + pad + ' :bytecode (' + bc.join(' ') + ')');
parts.push('\n' + pad + ' :constants (');
const constStrs = consts.map(c => serializeConstToSx(c, indent + 4));
if (constStrs.length > 0) {
parts.push('\n' + constStrs.map(s => pad + ' ' + s).join('\n'));
parts.push(')');
} else {
parts[parts.length - 1] += ')';
// Copy compiled files to shared/static/wasm/sx/ for web serving
const staticSxDir = path.resolve(__dirname, '..', '..', '..', 'shared', 'static', 'wasm', 'sx');
if (fs.existsSync(staticSxDir)) {
let copied = 0;
for (const file of FILES) {
for (const ext of ['.sxbc', '.sxbc.json']) {
const src = path.join(sxDir, file.replace(/\.sx$/, ext));
const dst = path.join(staticSxDir, file.replace(/\.sx$/, ext));
if (fs.existsSync(src)) {
fs.copyFileSync(src, dst);
copied++;
}
}
}
parts.push(')');
return parts.join('');
console.log('Copied', copied, 'files to', staticSxDir);
}
function serializeConstToSx(c, indent) {
if (c === null || c === undefined) return 'nil';
if (typeof c === 'number') return String(c);
if (typeof c === 'string') return '"' + c.replace(/\\/g, '\\\\').replace(/"/g, '\\"') + '"';
if (typeof c === 'boolean') return c ? 'true' : 'false';
if (c._type === 'symbol') return "'" + c.name;
if (c._type === 'keyword') return ':' + c.name;
if (c._type === 'list') {
const items = extractList(c).map(x => serializeConstToSx(x, indent));
return '(list ' + items.join(' ') + ')';
}
// Code object (nested lambda bytecode)
if (c.bytecode) return serializeCodeToSx(c, indent);
if (c._type === 'dict') {
const bc = c.get ? c.get('bytecode') : c.bytecode;
if (bc) return serializeCodeToSx(c, indent);
// Regular dict — serialize as {:key val ...}
const entries = [];
if (c.forEach) c.forEach((v, k) => { entries.push(':' + k + ' ' + serializeConstToSx(v, indent)); });
return '{' + entries.join(' ') + '}';
}
return 'nil';
}
const total = Date.now() - t0;
console.log('Done:', compiled, 'compiled,', skipped, 'skipped in', Math.round(total / 1000) + 's');
// --- JSON serialization (backwards compat) ---
function serializeModuleToJson(code) {
const result = {
bytecode: extractList(code.bytecode),
constants: extractList(code.constants).map(serializeConstantJson),
};
const arity = code.arity || code['arity'];
const uvc = code['upvalue-count'];
const locals = code.locals || code['locals'];
if (arity) result.arity = typeof arity === 'number' ? arity : 0;
if (uvc) result['upvalue-count'] = typeof uvc === 'number' ? uvc : 0;
if (locals) result.locals = typeof locals === 'number' ? locals : 0;
return result;
}
function serializeConstantJson(c) {
if (c === null || c === undefined) return { t: 'nil' };
if (typeof c === 'number') return { t: 'n', v: c };
if (typeof c === 'string') return { t: 's', v: c };
if (typeof c === 'boolean') return { t: 'b', v: c };
if (c._type === 'symbol') return { t: 'sym', v: c.name };
if (c._type === 'keyword') return { t: 'kw', v: c.name };
if (c._type === 'list') return { t: 'list', v: extractList(c).map(serializeConstantJson) };
if (c.bytecode) return { t: 'code', v: serializeModuleToJson(c) };
if (c._type === 'dict') {
const bc = c.get ? c.get('bytecode') : c.bytecode;
if (bc) return { t: 'code', v: serializeModuleToJson(c) };
const entries = {};
if (c.forEach) c.forEach((v, k) => { entries[k] = serializeConstantJson(v); });
return { t: 'dict', v: entries };
}
return { t: 'nil' };
}
function extractList(v) {
if (!v) return [];
if (Array.isArray(v)) return v;
if (v._type === 'list' && v.items) return v.items;
if (v.items) return v.items;
return [];
}
fs.unlinkSync(tmpFile);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long