Native bytecode compiler: 8x faster, compile-blob command

Rewrite compile-modules.js to use the native OCaml sx_server binary
instead of the js_of_ocaml kernel in Node.js. Compiles 23 modules in
23s (was 3+ minutes). Uses batch epoch protocol with latin1 encoding
to preserve byte positions for multi-byte UTF-8 content.

- Add compile-blob server command: parse source natively, compile via
  SX compile-module, return bytecode dict
- Fix orchestration.sxbc.json and boot.sxbc.json — never compiled
  successfully with the old JS kernel, now work with native compiler
- Auto-copy compiled bytecode to shared/static/wasm/sx/ for serving

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-30 09:49:28 +00:00
parent 7a4a6c8a85
commit 1985c648eb
6 changed files with 188 additions and 147 deletions

View File

@@ -829,6 +829,29 @@ let rec dispatch env cmd =
let src = read_blob () in let src = read_blob () in
dispatch env (List [Symbol "eval"; String src]) dispatch env (List [Symbol "eval"; String src])
| List [Symbol "compile-blob"] ->
(* Read source as blob, parse natively in OCaml, compile via SX compile-module.
Returns the bytecode dict as SX text. Much faster than JS kernel. *)
let src = read_blob () in
(try
let exprs = Sx_parser.parse_all src in
let compile_module = env_get env "compile-module" in
let result = Sx_ref.cek_call compile_module (List [List exprs]) in
let rec raw_serialize = function
| Nil -> "nil"
| Bool true -> "true" | Bool false -> "false"
| Number n -> if Float.is_integer n then string_of_int (int_of_float n) else Printf.sprintf "%g" n
| String s -> "\"" ^ escape_sx_string s ^ "\""
| Symbol s -> s | Keyword k -> ":" ^ k
| List items | ListRef { contents = items } -> "(" ^ String.concat " " (List.map raw_serialize items) ^ ")"
| Dict d -> let pairs = Hashtbl.fold (fun k v acc -> (Printf.sprintf ":%s %s" k (raw_serialize v)) :: acc) d [] in "{" ^ String.concat " " pairs ^ "}"
| SxExpr s -> s | _ -> "nil"
in
send_ok_raw (raw_serialize result)
with
| Eval_error msg -> send_error msg
| exn -> send_error (Printexc.to_string exn))
| List [Symbol "eval"; String src] -> | List [Symbol "eval"; String src] ->
(try (try
let exprs = Sx_parser.parse_all src in let exprs = Sx_parser.parse_all src in

View File

@@ -2,8 +2,9 @@
/** /**
* compile-modules.js — Pre-compile .sx files to bytecode s-expressions. * compile-modules.js — Pre-compile .sx files to bytecode s-expressions.
* *
* Uses the js_of_ocaml kernel in Node.js to compile each .sx module, * Uses the native OCaml sx_server binary for compilation (~5x faster than
* then serializes the bytecode as .sxbc (s-expression format) for browser loading. * the js_of_ocaml kernel). Sends source via the blob protocol, receives
* compiled bytecode as SX text.
* *
* Usage: node compile-modules.js [dist-dir] * Usage: node compile-modules.js [dist-dir]
*/ */
@@ -11,6 +12,7 @@
const fs = require('fs'); const fs = require('fs');
const path = require('path'); const path = require('path');
const crypto = require('crypto'); const crypto = require('crypto');
const { execSync, spawnSync } = require('child_process');
const distDir = process.argv[2] || path.join(__dirname, 'dist'); const distDir = process.argv[2] || path.join(__dirname, 'dist');
const sxDir = path.join(distDir, 'sx'); const sxDir = path.join(distDir, 'sx');
@@ -20,15 +22,16 @@ if (!fs.existsSync(sxDir)) {
process.exit(1); process.exit(1);
} }
// Load the js_of_ocaml kernel // Find the native OCaml binary
const kernelPath = path.join(__dirname, '..', '_build', 'default', 'browser', 'sx_browser.bc.js'); const binPaths = [
if (!fs.existsSync(kernelPath)) { path.join(__dirname, '..', '_build', 'default', 'bin', 'sx_server.exe'),
console.error('Kernel not found:', kernelPath); '/app/bin/sx_server',
];
const binPath = binPaths.find(p => fs.existsSync(p));
if (!binPath) {
console.error('sx_server binary not found at:', binPaths.join(', '));
process.exit(1); process.exit(1);
} }
require(kernelPath);
const K = globalThis.SxKernel;
if (!K) { console.error('SxKernel not initialized'); process.exit(1); }
const FILES = [ const FILES = [
'render.sx', 'core-signals.sx', 'signals.sx', 'deps.sx', 'router.sx', 'render.sx', 'core-signals.sx', 'signals.sx', 'deps.sx', 'router.sx',
@@ -38,155 +41,162 @@ const FILES = [
'harness-web.sx', 'engine.sx', 'orchestration.sx', 'boot.sx', 'harness-web.sx', 'engine.sx', 'orchestration.sx', 'boot.sx',
]; ];
// Load all files to build up the env (need compiler loaded) // ---------------------------------------------------------------------------
console.log('Loading SX environment...'); // Build the full input script — all commands in one batch
// ---------------------------------------------------------------------------
const t0 = Date.now();
console.log('Building compilation script...');
let epoch = 1;
let script = '';
// Load compiler
script += `(epoch ${epoch++})\n(load "lib/compiler.sx")\n`;
// JIT pre-compile the compiler
script += `(epoch ${epoch++})\n(vm-compile-adapter)\n`;
// Load all modules into env
for (const file of FILES) { for (const file of FILES) {
const r = K.load(fs.readFileSync(path.join(sxDir, file), 'utf8')); const src = fs.readFileSync(path.join(sxDir, file), 'utf8');
if (typeof r === 'string' && r.startsWith('Error')) { const buf = Buffer.from(src, 'utf8');
console.error(' FAIL', file, r); script += `(epoch ${epoch++})\n(eval-blob)\n(blob ${buf.length})\n`;
process.exit(1); script += src + '\n';
}
} }
console.log(' ' + FILES.length + ' files loaded');
// Compile each file to bytecode
console.log('Compiling bytecode modules...');
let compiled = 0, skipped = 0;
// Compile each module
const compileEpochs = {};
for (const file of FILES) { for (const file of FILES) {
const srcPath = path.join(sxDir, file); const src = fs.readFileSync(path.join(sxDir, file), 'utf8');
const src = fs.readFileSync(srcPath, 'utf8'); const buf = Buffer.from(src, 'utf8');
const hash = crypto.createHash('sha256').update(src).digest('hex').slice(0, 16); const ep = epoch++;
compileEpochs[ep] = file;
script += `(epoch ${ep})\n(compile-blob)\n(blob ${buf.length})\n`;
script += src + '\n';
}
try { // Write script to temp file and pipe to server
const code = K.eval('(compile-module (sx-parse ' + JSON.stringify(src) + '))'); const tmpFile = '/tmp/sx-compile-script.txt';
fs.writeFileSync(tmpFile, script);
if (typeof code === 'string' && code.startsWith('Error')) { console.log('Running native OCaml compiler (' + FILES.length + ' files)...');
console.error(' SKIP', file, '—', code); const t1 = Date.now();
const result = spawnSync(binPath, [], {
input: fs.readFileSync(tmpFile),
maxBuffer: 100 * 1024 * 1024, // 100MB
timeout: 300000, // 5 min
stdio: ['pipe', 'pipe', 'pipe'],
});
if (result.error) {
console.error('Server error:', result.error);
process.exit(1);
}
const stderr = result.stderr.toString();
process.stderr.write(stderr);
// Use latin1 to preserve byte positions (UTF-8 multi-byte chars stay as-is in length)
const stdoutBuf = result.stdout;
const stdout = stdoutBuf.toString('latin1');
const dt = Date.now() - t1;
console.log('Server finished in ' + Math.round(dt / 1000) + 's');
// ---------------------------------------------------------------------------
// Parse responses — extract compiled bytecode for each file
// ---------------------------------------------------------------------------
// Parse responses — stdout is latin1 so byte positions match string positions
let compiled = 0, skipped = 0;
let pos = 0;
function nextLine() {
const nl = stdout.indexOf('\n', pos);
if (nl === -1) return null;
const line = stdout.slice(pos, nl);
pos = nl + 1;
return line;
}
while (pos < stdout.length) {
const line = nextLine();
if (line === null) break;
const trimmed = line.trim();
// ok-len EPOCH LEN — read LEN bytes as value
const lenMatch = trimmed.match(/^\(ok-len (\d+) (\d+)\)$/);
if (lenMatch) {
const ep = parseInt(lenMatch[1]);
const len = parseInt(lenMatch[2]);
// Read exactly len bytes — latin1 encoding preserves byte positions
const rawValue = stdout.slice(pos, pos + len);
// Re-encode to proper UTF-8
const value = Buffer.from(rawValue, 'latin1').toString('utf8');
pos += len;
// skip trailing newline
if (pos < stdout.length && stdout.charCodeAt(pos) === 10) pos++;
const file = compileEpochs[ep];
if (file) {
if (value === 'nil' || value.startsWith('(error')) {
console.error(' SKIP', file, '—', value.slice(0, 60));
skipped++; skipped++;
continue; } else {
} const hash = crypto.createHash('sha256')
.update(fs.readFileSync(path.join(sxDir, file), 'utf8'))
.digest('hex').slice(0, 16);
const sx = serializeModuleToSx(code, hash); const sxbc = '(sxbc 1 "' + hash + '"\n (code\n ' +
value.replace(/^\{/, '').replace(/\}$/, '').trim() + '))\n';
// Write .sxbc (s-expression format) const outPath = path.join(sxDir, file.replace(/\.sx$/, '.sxbc'));
const outPath = srcPath.replace(/\.sx$/, '.sxbc'); fs.writeFileSync(outPath, sxbc);
fs.writeFileSync(outPath, sx);
// Also write .sxbc.json for backwards compatibility during transition
const json = {
magic: 'SXBC',
version: 1,
hash: hash,
module: serializeModuleToJson(code),
};
const jsonPath = srcPath.replace(/\.sx$/, '.sxbc.json');
fs.writeFileSync(jsonPath, JSON.stringify(json));
const size = fs.statSync(outPath).size; const size = fs.statSync(outPath).size;
console.log(' ok', file, '→', Math.round(size / 1024) + 'K'); console.log(' ok', file, '→', Math.round(size / 1024) + 'K');
compiled++; compiled++;
} catch (e) { }
console.error(' SKIP', file, '—', e.message || e); }
continue;
}
// Simple ok or error — skip
if (trimmed.match(/^\(ok \d+/) || trimmed.match(/^\(error \d+/)) {
if (trimmed.match(/^\(error/)) {
const epMatch = trimmed.match(/^\(error (\d+)/);
if (epMatch) {
const ep = parseInt(epMatch[1]);
const file = compileEpochs[ep];
if (file) {
console.error(' SKIP', file, '—', trimmed.slice(0, 80));
skipped++; skipped++;
} }
}
console.log('Done:', compiled, 'compiled,', skipped, 'skipped');
// --- S-expression serialization ---
function serializeModuleToSx(code, hash) {
return '(sxbc 1 "' + hash + '"\n ' + serializeCodeToSx(code, 2) + ')\n';
}
function serializeCodeToSx(code, indent) {
const pad = ' '.repeat(indent);
const bc = extractList(code.bytecode);
const consts = extractList(code.constants);
const arity = code.arity || code['arity'] || 0;
const uvc = code['upvalue-count'] || 0;
let parts = ['(code'];
if (arity) parts.push(' :arity ' + arity);
if (uvc) parts.push(' :upvalue-count ' + uvc);
parts.push('\n' + pad + ' :bytecode (' + bc.join(' ') + ')');
parts.push('\n' + pad + ' :constants (');
const constStrs = consts.map(c => serializeConstToSx(c, indent + 4));
if (constStrs.length > 0) {
parts.push('\n' + constStrs.map(s => pad + ' ' + s).join('\n'));
parts.push(')');
} else {
parts[parts.length - 1] += ')';
} }
parts.push(')');
return parts.join('');
}
function serializeConstToSx(c, indent) {
if (c === null || c === undefined) return 'nil';
if (typeof c === 'number') return String(c);
if (typeof c === 'string') return '"' + c.replace(/\\/g, '\\\\').replace(/"/g, '\\"') + '"';
if (typeof c === 'boolean') return c ? 'true' : 'false';
if (c._type === 'symbol') return "'" + c.name;
if (c._type === 'keyword') return ':' + c.name;
if (c._type === 'list') {
const items = extractList(c).map(x => serializeConstToSx(x, indent));
return '(list ' + items.join(' ') + ')';
} }
// Code object (nested lambda bytecode) continue;
if (c.bytecode) return serializeCodeToSx(c, indent);
if (c._type === 'dict') {
const bc = c.get ? c.get('bytecode') : c.bytecode;
if (bc) return serializeCodeToSx(c, indent);
// Regular dict — serialize as {:key val ...}
const entries = [];
if (c.forEach) c.forEach((v, k) => { entries.push(':' + k + ' ' + serializeConstToSx(v, indent)); });
return '{' + entries.join(' ') + '}';
} }
return 'nil';
} }
// --- JSON serialization (backwards compat) --- // Copy compiled files to shared/static/wasm/sx/ for web serving
const staticSxDir = path.resolve(__dirname, '..', '..', '..', 'shared', 'static', 'wasm', 'sx');
function serializeModuleToJson(code) { if (fs.existsSync(staticSxDir)) {
const result = { let copied = 0;
bytecode: extractList(code.bytecode), for (const file of FILES) {
constants: extractList(code.constants).map(serializeConstantJson), for (const ext of ['.sxbc', '.sxbc.json']) {
}; const src = path.join(sxDir, file.replace(/\.sx$/, ext));
const arity = code.arity || code['arity']; const dst = path.join(staticSxDir, file.replace(/\.sx$/, ext));
const uvc = code['upvalue-count']; if (fs.existsSync(src)) {
const locals = code.locals || code['locals']; fs.copyFileSync(src, dst);
if (arity) result.arity = typeof arity === 'number' ? arity : 0; copied++;
if (uvc) result['upvalue-count'] = typeof uvc === 'number' ? uvc : 0;
if (locals) result.locals = typeof locals === 'number' ? locals : 0;
return result;
}
function serializeConstantJson(c) {
if (c === null || c === undefined) return { t: 'nil' };
if (typeof c === 'number') return { t: 'n', v: c };
if (typeof c === 'string') return { t: 's', v: c };
if (typeof c === 'boolean') return { t: 'b', v: c };
if (c._type === 'symbol') return { t: 'sym', v: c.name };
if (c._type === 'keyword') return { t: 'kw', v: c.name };
if (c._type === 'list') return { t: 'list', v: extractList(c).map(serializeConstantJson) };
if (c.bytecode) return { t: 'code', v: serializeModuleToJson(c) };
if (c._type === 'dict') {
const bc = c.get ? c.get('bytecode') : c.bytecode;
if (bc) return { t: 'code', v: serializeModuleToJson(c) };
const entries = {};
if (c.forEach) c.forEach((v, k) => { entries[k] = serializeConstantJson(v); });
return { t: 'dict', v: entries };
} }
return { t: 'nil' }; }
}
console.log('Copied', copied, 'files to', staticSxDir);
} }
function extractList(v) { const total = Date.now() - t0;
if (!v) return []; console.log('Done:', compiled, 'compiled,', skipped, 'skipped in', Math.round(total / 1000) + 's');
if (Array.isArray(v)) return v;
if (v._type === 'list' && v.items) return v.items; fs.unlinkSync(tmpFile);
if (v.items) return v.items;
return [];
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long