Native bytecode compiler: 8x faster, compile-blob command

Rewrite compile-modules.js to use the native OCaml sx_server binary
instead of the js_of_ocaml kernel in Node.js. Compiles 23 modules in
23s (was 3+ minutes). Uses batch epoch protocol with latin1 encoding
to preserve byte positions for multi-byte UTF-8 content.

- Add compile-blob server command: parse source natively, compile via
  SX compile-module, return bytecode dict
- Fix orchestration.sxbc.json and boot.sxbc.json — never compiled
  successfully with the old JS kernel, now work with native compiler
- Auto-copy compiled bytecode to shared/static/wasm/sx/ for serving

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-30 09:49:28 +00:00
parent 7a4a6c8a85
commit 1985c648eb
6 changed files with 188 additions and 147 deletions

View File

@@ -829,6 +829,29 @@ let rec dispatch env cmd =
let src = read_blob () in let src = read_blob () in
dispatch env (List [Symbol "eval"; String src]) dispatch env (List [Symbol "eval"; String src])
| List [Symbol "compile-blob"] ->
(* Read source as blob, parse natively in OCaml, compile via SX compile-module.
Returns the bytecode dict as SX text. Much faster than JS kernel. *)
let src = read_blob () in
(try
let exprs = Sx_parser.parse_all src in
let compile_module = env_get env "compile-module" in
let result = Sx_ref.cek_call compile_module (List [List exprs]) in
let rec raw_serialize = function
| Nil -> "nil"
| Bool true -> "true" | Bool false -> "false"
| Number n -> if Float.is_integer n then string_of_int (int_of_float n) else Printf.sprintf "%g" n
| String s -> "\"" ^ escape_sx_string s ^ "\""
| Symbol s -> s | Keyword k -> ":" ^ k
| List items | ListRef { contents = items } -> "(" ^ String.concat " " (List.map raw_serialize items) ^ ")"
| Dict d -> let pairs = Hashtbl.fold (fun k v acc -> (Printf.sprintf ":%s %s" k (raw_serialize v)) :: acc) d [] in "{" ^ String.concat " " pairs ^ "}"
| SxExpr s -> s | _ -> "nil"
in
send_ok_raw (raw_serialize result)
with
| Eval_error msg -> send_error msg
| exn -> send_error (Printexc.to_string exn))
| List [Symbol "eval"; String src] -> | List [Symbol "eval"; String src] ->
(try (try
let exprs = Sx_parser.parse_all src in let exprs = Sx_parser.parse_all src in

View File

@@ -2,8 +2,9 @@
/** /**
* compile-modules.js — Pre-compile .sx files to bytecode s-expressions. * compile-modules.js — Pre-compile .sx files to bytecode s-expressions.
* *
* Uses the js_of_ocaml kernel in Node.js to compile each .sx module, * Uses the native OCaml sx_server binary for compilation (~5x faster than
* then serializes the bytecode as .sxbc (s-expression format) for browser loading. * the js_of_ocaml kernel). Sends source via the blob protocol, receives
* compiled bytecode as SX text.
* *
* Usage: node compile-modules.js [dist-dir] * Usage: node compile-modules.js [dist-dir]
*/ */
@@ -11,6 +12,7 @@
const fs = require('fs'); const fs = require('fs');
const path = require('path'); const path = require('path');
const crypto = require('crypto'); const crypto = require('crypto');
const { execSync, spawnSync } = require('child_process');
const distDir = process.argv[2] || path.join(__dirname, 'dist'); const distDir = process.argv[2] || path.join(__dirname, 'dist');
const sxDir = path.join(distDir, 'sx'); const sxDir = path.join(distDir, 'sx');
@@ -20,15 +22,16 @@ if (!fs.existsSync(sxDir)) {
process.exit(1); process.exit(1);
} }
// Load the js_of_ocaml kernel // Find the native OCaml binary
const kernelPath = path.join(__dirname, '..', '_build', 'default', 'browser', 'sx_browser.bc.js'); const binPaths = [
if (!fs.existsSync(kernelPath)) { path.join(__dirname, '..', '_build', 'default', 'bin', 'sx_server.exe'),
console.error('Kernel not found:', kernelPath); '/app/bin/sx_server',
];
const binPath = binPaths.find(p => fs.existsSync(p));
if (!binPath) {
console.error('sx_server binary not found at:', binPaths.join(', '));
process.exit(1); process.exit(1);
} }
require(kernelPath);
const K = globalThis.SxKernel;
if (!K) { console.error('SxKernel not initialized'); process.exit(1); }
const FILES = [ const FILES = [
'render.sx', 'core-signals.sx', 'signals.sx', 'deps.sx', 'router.sx', 'render.sx', 'core-signals.sx', 'signals.sx', 'deps.sx', 'router.sx',
@@ -38,155 +41,162 @@ const FILES = [
'harness-web.sx', 'engine.sx', 'orchestration.sx', 'boot.sx', 'harness-web.sx', 'engine.sx', 'orchestration.sx', 'boot.sx',
]; ];
// Load all files to build up the env (need compiler loaded) // ---------------------------------------------------------------------------
console.log('Loading SX environment...'); // Build the full input script — all commands in one batch
// ---------------------------------------------------------------------------
const t0 = Date.now();
console.log('Building compilation script...');
let epoch = 1;
let script = '';
// Load compiler
script += `(epoch ${epoch++})\n(load "lib/compiler.sx")\n`;
// JIT pre-compile the compiler
script += `(epoch ${epoch++})\n(vm-compile-adapter)\n`;
// Load all modules into env
for (const file of FILES) { for (const file of FILES) {
const r = K.load(fs.readFileSync(path.join(sxDir, file), 'utf8')); const src = fs.readFileSync(path.join(sxDir, file), 'utf8');
if (typeof r === 'string' && r.startsWith('Error')) { const buf = Buffer.from(src, 'utf8');
console.error(' FAIL', file, r); script += `(epoch ${epoch++})\n(eval-blob)\n(blob ${buf.length})\n`;
script += src + '\n';
}
// Compile each module
const compileEpochs = {};
for (const file of FILES) {
const src = fs.readFileSync(path.join(sxDir, file), 'utf8');
const buf = Buffer.from(src, 'utf8');
const ep = epoch++;
compileEpochs[ep] = file;
script += `(epoch ${ep})\n(compile-blob)\n(blob ${buf.length})\n`;
script += src + '\n';
}
// Write script to temp file and pipe to server
const tmpFile = '/tmp/sx-compile-script.txt';
fs.writeFileSync(tmpFile, script);
console.log('Running native OCaml compiler (' + FILES.length + ' files)...');
const t1 = Date.now();
const result = spawnSync(binPath, [], {
input: fs.readFileSync(tmpFile),
maxBuffer: 100 * 1024 * 1024, // 100MB
timeout: 300000, // 5 min
stdio: ['pipe', 'pipe', 'pipe'],
});
if (result.error) {
console.error('Server error:', result.error);
process.exit(1); process.exit(1);
} }
}
console.log(' ' + FILES.length + ' files loaded');
// Compile each file to bytecode const stderr = result.stderr.toString();
console.log('Compiling bytecode modules...'); process.stderr.write(stderr);
// Use latin1 to preserve byte positions (UTF-8 multi-byte chars stay as-is in length)
const stdoutBuf = result.stdout;
const stdout = stdoutBuf.toString('latin1');
const dt = Date.now() - t1;
console.log('Server finished in ' + Math.round(dt / 1000) + 's');
// ---------------------------------------------------------------------------
// Parse responses — extract compiled bytecode for each file
// ---------------------------------------------------------------------------
// Parse responses — stdout is latin1 so byte positions match string positions
let compiled = 0, skipped = 0; let compiled = 0, skipped = 0;
let pos = 0;
for (const file of FILES) { function nextLine() {
const srcPath = path.join(sxDir, file); const nl = stdout.indexOf('\n', pos);
const src = fs.readFileSync(srcPath, 'utf8'); if (nl === -1) return null;
const hash = crypto.createHash('sha256').update(src).digest('hex').slice(0, 16); const line = stdout.slice(pos, nl);
pos = nl + 1;
try { return line;
const code = K.eval('(compile-module (sx-parse ' + JSON.stringify(src) + '))');
if (typeof code === 'string' && code.startsWith('Error')) {
console.error(' SKIP', file, '—', code);
skipped++;
continue;
} }
const sx = serializeModuleToSx(code, hash); while (pos < stdout.length) {
const line = nextLine();
if (line === null) break;
const trimmed = line.trim();
// Write .sxbc (s-expression format) // ok-len EPOCH LEN — read LEN bytes as value
const outPath = srcPath.replace(/\.sx$/, '.sxbc'); const lenMatch = trimmed.match(/^\(ok-len (\d+) (\d+)\)$/);
fs.writeFileSync(outPath, sx); if (lenMatch) {
const ep = parseInt(lenMatch[1]);
const len = parseInt(lenMatch[2]);
// Read exactly len bytes — latin1 encoding preserves byte positions
const rawValue = stdout.slice(pos, pos + len);
// Re-encode to proper UTF-8
const value = Buffer.from(rawValue, 'latin1').toString('utf8');
pos += len;
// skip trailing newline
if (pos < stdout.length && stdout.charCodeAt(pos) === 10) pos++;
// Also write .sxbc.json for backwards compatibility during transition const file = compileEpochs[ep];
const json = { if (file) {
magic: 'SXBC', if (value === 'nil' || value.startsWith('(error')) {
version: 1, console.error(' SKIP', file, '—', value.slice(0, 60));
hash: hash, skipped++;
module: serializeModuleToJson(code), } else {
}; const hash = crypto.createHash('sha256')
const jsonPath = srcPath.replace(/\.sx$/, '.sxbc.json'); .update(fs.readFileSync(path.join(sxDir, file), 'utf8'))
fs.writeFileSync(jsonPath, JSON.stringify(json)); .digest('hex').slice(0, 16);
const sxbc = '(sxbc 1 "' + hash + '"\n (code\n ' +
value.replace(/^\{/, '').replace(/\}$/, '').trim() + '))\n';
const outPath = path.join(sxDir, file.replace(/\.sx$/, '.sxbc'));
fs.writeFileSync(outPath, sxbc);
const size = fs.statSync(outPath).size; const size = fs.statSync(outPath).size;
console.log(' ok', file, '→', Math.round(size / 1024) + 'K'); console.log(' ok', file, '→', Math.round(size / 1024) + 'K');
compiled++; compiled++;
} catch (e) { }
console.error(' SKIP', file, '—', e.message || e); }
continue;
}
// Simple ok or error — skip
if (trimmed.match(/^\(ok \d+/) || trimmed.match(/^\(error \d+/)) {
if (trimmed.match(/^\(error/)) {
const epMatch = trimmed.match(/^\(error (\d+)/);
if (epMatch) {
const ep = parseInt(epMatch[1]);
const file = compileEpochs[ep];
if (file) {
console.error(' SKIP', file, '—', trimmed.slice(0, 80));
skipped++; skipped++;
} }
} }
}
console.log('Done:', compiled, 'compiled,', skipped, 'skipped'); continue;
}
// --- S-expression serialization ---
function serializeModuleToSx(code, hash) {
return '(sxbc 1 "' + hash + '"\n ' + serializeCodeToSx(code, 2) + ')\n';
} }
function serializeCodeToSx(code, indent) { // Copy compiled files to shared/static/wasm/sx/ for web serving
const pad = ' '.repeat(indent); const staticSxDir = path.resolve(__dirname, '..', '..', '..', 'shared', 'static', 'wasm', 'sx');
const bc = extractList(code.bytecode); if (fs.existsSync(staticSxDir)) {
const consts = extractList(code.constants); let copied = 0;
const arity = code.arity || code['arity'] || 0; for (const file of FILES) {
const uvc = code['upvalue-count'] || 0; for (const ext of ['.sxbc', '.sxbc.json']) {
const src = path.join(sxDir, file.replace(/\.sx$/, ext));
let parts = ['(code']; const dst = path.join(staticSxDir, file.replace(/\.sx$/, ext));
if (arity) parts.push(' :arity ' + arity); if (fs.existsSync(src)) {
if (uvc) parts.push(' :upvalue-count ' + uvc); fs.copyFileSync(src, dst);
parts.push('\n' + pad + ' :bytecode (' + bc.join(' ') + ')'); copied++;
parts.push('\n' + pad + ' :constants (');
const constStrs = consts.map(c => serializeConstToSx(c, indent + 4));
if (constStrs.length > 0) {
parts.push('\n' + constStrs.map(s => pad + ' ' + s).join('\n'));
parts.push(')');
} else {
parts[parts.length - 1] += ')';
} }
parts.push(')'); }
return parts.join(''); }
console.log('Copied', copied, 'files to', staticSxDir);
} }
function serializeConstToSx(c, indent) { const total = Date.now() - t0;
if (c === null || c === undefined) return 'nil'; console.log('Done:', compiled, 'compiled,', skipped, 'skipped in', Math.round(total / 1000) + 's');
if (typeof c === 'number') return String(c);
if (typeof c === 'string') return '"' + c.replace(/\\/g, '\\\\').replace(/"/g, '\\"') + '"';
if (typeof c === 'boolean') return c ? 'true' : 'false';
if (c._type === 'symbol') return "'" + c.name;
if (c._type === 'keyword') return ':' + c.name;
if (c._type === 'list') {
const items = extractList(c).map(x => serializeConstToSx(x, indent));
return '(list ' + items.join(' ') + ')';
}
// Code object (nested lambda bytecode)
if (c.bytecode) return serializeCodeToSx(c, indent);
if (c._type === 'dict') {
const bc = c.get ? c.get('bytecode') : c.bytecode;
if (bc) return serializeCodeToSx(c, indent);
// Regular dict — serialize as {:key val ...}
const entries = [];
if (c.forEach) c.forEach((v, k) => { entries.push(':' + k + ' ' + serializeConstToSx(v, indent)); });
return '{' + entries.join(' ') + '}';
}
return 'nil';
}
// --- JSON serialization (backwards compat) --- fs.unlinkSync(tmpFile);
function serializeModuleToJson(code) {
const result = {
bytecode: extractList(code.bytecode),
constants: extractList(code.constants).map(serializeConstantJson),
};
const arity = code.arity || code['arity'];
const uvc = code['upvalue-count'];
const locals = code.locals || code['locals'];
if (arity) result.arity = typeof arity === 'number' ? arity : 0;
if (uvc) result['upvalue-count'] = typeof uvc === 'number' ? uvc : 0;
if (locals) result.locals = typeof locals === 'number' ? locals : 0;
return result;
}
function serializeConstantJson(c) {
if (c === null || c === undefined) return { t: 'nil' };
if (typeof c === 'number') return { t: 'n', v: c };
if (typeof c === 'string') return { t: 's', v: c };
if (typeof c === 'boolean') return { t: 'b', v: c };
if (c._type === 'symbol') return { t: 'sym', v: c.name };
if (c._type === 'keyword') return { t: 'kw', v: c.name };
if (c._type === 'list') return { t: 'list', v: extractList(c).map(serializeConstantJson) };
if (c.bytecode) return { t: 'code', v: serializeModuleToJson(c) };
if (c._type === 'dict') {
const bc = c.get ? c.get('bytecode') : c.bytecode;
if (bc) return { t: 'code', v: serializeModuleToJson(c) };
const entries = {};
if (c.forEach) c.forEach((v, k) => { entries[k] = serializeConstantJson(v); });
return { t: 'dict', v: entries };
}
return { t: 'nil' };
}
function extractList(v) {
if (!v) return [];
if (Array.isArray(v)) return v;
if (v._type === 'list' && v.items) return v.items;
if (v.items) return v.items;
return [];
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long