Native bytecode compiler: 8x faster, compile-blob command
Rewrite compile-modules.js to use the native OCaml sx_server binary instead of the js_of_ocaml kernel in Node.js. Compiles 23 modules in 23s (was 3+ minutes). Uses batch epoch protocol with latin1 encoding to preserve byte positions for multi-byte UTF-8 content. - Add compile-blob server command: parse source natively, compile via SX compile-module, return bytecode dict - Fix orchestration.sxbc.json and boot.sxbc.json — never compiled successfully with the old JS kernel, now work with native compiler - Auto-copy compiled bytecode to shared/static/wasm/sx/ for serving Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -829,6 +829,29 @@ let rec dispatch env cmd =
|
|||||||
let src = read_blob () in
|
let src = read_blob () in
|
||||||
dispatch env (List [Symbol "eval"; String src])
|
dispatch env (List [Symbol "eval"; String src])
|
||||||
|
|
||||||
|
| List [Symbol "compile-blob"] ->
|
||||||
|
(* Read source as blob, parse natively in OCaml, compile via SX compile-module.
|
||||||
|
Returns the bytecode dict as SX text. Much faster than JS kernel. *)
|
||||||
|
let src = read_blob () in
|
||||||
|
(try
|
||||||
|
let exprs = Sx_parser.parse_all src in
|
||||||
|
let compile_module = env_get env "compile-module" in
|
||||||
|
let result = Sx_ref.cek_call compile_module (List [List exprs]) in
|
||||||
|
let rec raw_serialize = function
|
||||||
|
| Nil -> "nil"
|
||||||
|
| Bool true -> "true" | Bool false -> "false"
|
||||||
|
| Number n -> if Float.is_integer n then string_of_int (int_of_float n) else Printf.sprintf "%g" n
|
||||||
|
| String s -> "\"" ^ escape_sx_string s ^ "\""
|
||||||
|
| Symbol s -> s | Keyword k -> ":" ^ k
|
||||||
|
| List items | ListRef { contents = items } -> "(" ^ String.concat " " (List.map raw_serialize items) ^ ")"
|
||||||
|
| Dict d -> let pairs = Hashtbl.fold (fun k v acc -> (Printf.sprintf ":%s %s" k (raw_serialize v)) :: acc) d [] in "{" ^ String.concat " " pairs ^ "}"
|
||||||
|
| SxExpr s -> s | _ -> "nil"
|
||||||
|
in
|
||||||
|
send_ok_raw (raw_serialize result)
|
||||||
|
with
|
||||||
|
| Eval_error msg -> send_error msg
|
||||||
|
| exn -> send_error (Printexc.to_string exn))
|
||||||
|
|
||||||
| List [Symbol "eval"; String src] ->
|
| List [Symbol "eval"; String src] ->
|
||||||
(try
|
(try
|
||||||
let exprs = Sx_parser.parse_all src in
|
let exprs = Sx_parser.parse_all src in
|
||||||
|
|||||||
@@ -2,8 +2,9 @@
|
|||||||
/**
|
/**
|
||||||
* compile-modules.js — Pre-compile .sx files to bytecode s-expressions.
|
* compile-modules.js — Pre-compile .sx files to bytecode s-expressions.
|
||||||
*
|
*
|
||||||
* Uses the js_of_ocaml kernel in Node.js to compile each .sx module,
|
* Uses the native OCaml sx_server binary for compilation (~5x faster than
|
||||||
* then serializes the bytecode as .sxbc (s-expression format) for browser loading.
|
* the js_of_ocaml kernel). Sends source via the blob protocol, receives
|
||||||
|
* compiled bytecode as SX text.
|
||||||
*
|
*
|
||||||
* Usage: node compile-modules.js [dist-dir]
|
* Usage: node compile-modules.js [dist-dir]
|
||||||
*/
|
*/
|
||||||
@@ -11,6 +12,7 @@
|
|||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
const crypto = require('crypto');
|
const crypto = require('crypto');
|
||||||
|
const { execSync, spawnSync } = require('child_process');
|
||||||
|
|
||||||
const distDir = process.argv[2] || path.join(__dirname, 'dist');
|
const distDir = process.argv[2] || path.join(__dirname, 'dist');
|
||||||
const sxDir = path.join(distDir, 'sx');
|
const sxDir = path.join(distDir, 'sx');
|
||||||
@@ -20,15 +22,16 @@ if (!fs.existsSync(sxDir)) {
|
|||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load the js_of_ocaml kernel
|
// Find the native OCaml binary
|
||||||
const kernelPath = path.join(__dirname, '..', '_build', 'default', 'browser', 'sx_browser.bc.js');
|
const binPaths = [
|
||||||
if (!fs.existsSync(kernelPath)) {
|
path.join(__dirname, '..', '_build', 'default', 'bin', 'sx_server.exe'),
|
||||||
console.error('Kernel not found:', kernelPath);
|
'/app/bin/sx_server',
|
||||||
|
];
|
||||||
|
const binPath = binPaths.find(p => fs.existsSync(p));
|
||||||
|
if (!binPath) {
|
||||||
|
console.error('sx_server binary not found at:', binPaths.join(', '));
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
require(kernelPath);
|
|
||||||
const K = globalThis.SxKernel;
|
|
||||||
if (!K) { console.error('SxKernel not initialized'); process.exit(1); }
|
|
||||||
|
|
||||||
const FILES = [
|
const FILES = [
|
||||||
'render.sx', 'core-signals.sx', 'signals.sx', 'deps.sx', 'router.sx',
|
'render.sx', 'core-signals.sx', 'signals.sx', 'deps.sx', 'router.sx',
|
||||||
@@ -38,155 +41,162 @@ const FILES = [
|
|||||||
'harness-web.sx', 'engine.sx', 'orchestration.sx', 'boot.sx',
|
'harness-web.sx', 'engine.sx', 'orchestration.sx', 'boot.sx',
|
||||||
];
|
];
|
||||||
|
|
||||||
// Load all files to build up the env (need compiler loaded)
|
// ---------------------------------------------------------------------------
|
||||||
console.log('Loading SX environment...');
|
// Build the full input script — all commands in one batch
|
||||||
for (const file of FILES) {
|
// ---------------------------------------------------------------------------
|
||||||
const r = K.load(fs.readFileSync(path.join(sxDir, file), 'utf8'));
|
|
||||||
if (typeof r === 'string' && r.startsWith('Error')) {
|
|
||||||
console.error(' FAIL', file, r);
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
console.log(' ' + FILES.length + ' files loaded');
|
|
||||||
|
|
||||||
// Compile each file to bytecode
|
const t0 = Date.now();
|
||||||
console.log('Compiling bytecode modules...');
|
console.log('Building compilation script...');
|
||||||
|
|
||||||
|
let epoch = 1;
|
||||||
|
let script = '';
|
||||||
|
|
||||||
|
// Load compiler
|
||||||
|
script += `(epoch ${epoch++})\n(load "lib/compiler.sx")\n`;
|
||||||
|
|
||||||
|
// JIT pre-compile the compiler
|
||||||
|
script += `(epoch ${epoch++})\n(vm-compile-adapter)\n`;
|
||||||
|
|
||||||
|
// Load all modules into env
|
||||||
|
for (const file of FILES) {
|
||||||
|
const src = fs.readFileSync(path.join(sxDir, file), 'utf8');
|
||||||
|
const buf = Buffer.from(src, 'utf8');
|
||||||
|
script += `(epoch ${epoch++})\n(eval-blob)\n(blob ${buf.length})\n`;
|
||||||
|
script += src + '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile each module
|
||||||
|
const compileEpochs = {};
|
||||||
|
for (const file of FILES) {
|
||||||
|
const src = fs.readFileSync(path.join(sxDir, file), 'utf8');
|
||||||
|
const buf = Buffer.from(src, 'utf8');
|
||||||
|
const ep = epoch++;
|
||||||
|
compileEpochs[ep] = file;
|
||||||
|
script += `(epoch ${ep})\n(compile-blob)\n(blob ${buf.length})\n`;
|
||||||
|
script += src + '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write script to temp file and pipe to server
|
||||||
|
const tmpFile = '/tmp/sx-compile-script.txt';
|
||||||
|
fs.writeFileSync(tmpFile, script);
|
||||||
|
|
||||||
|
console.log('Running native OCaml compiler (' + FILES.length + ' files)...');
|
||||||
|
const t1 = Date.now();
|
||||||
|
|
||||||
|
const result = spawnSync(binPath, [], {
|
||||||
|
input: fs.readFileSync(tmpFile),
|
||||||
|
maxBuffer: 100 * 1024 * 1024, // 100MB
|
||||||
|
timeout: 300000, // 5 min
|
||||||
|
stdio: ['pipe', 'pipe', 'pipe'],
|
||||||
|
});
|
||||||
|
|
||||||
|
if (result.error) {
|
||||||
|
console.error('Server error:', result.error);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const stderr = result.stderr.toString();
|
||||||
|
process.stderr.write(stderr);
|
||||||
|
|
||||||
|
// Use latin1 to preserve byte positions (UTF-8 multi-byte chars stay as-is in length)
|
||||||
|
const stdoutBuf = result.stdout;
|
||||||
|
const stdout = stdoutBuf.toString('latin1');
|
||||||
|
const dt = Date.now() - t1;
|
||||||
|
console.log('Server finished in ' + Math.round(dt / 1000) + 's');
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Parse responses — extract compiled bytecode for each file
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// Parse responses — stdout is latin1 so byte positions match string positions
|
||||||
let compiled = 0, skipped = 0;
|
let compiled = 0, skipped = 0;
|
||||||
|
let pos = 0;
|
||||||
|
|
||||||
for (const file of FILES) {
|
function nextLine() {
|
||||||
const srcPath = path.join(sxDir, file);
|
const nl = stdout.indexOf('\n', pos);
|
||||||
const src = fs.readFileSync(srcPath, 'utf8');
|
if (nl === -1) return null;
|
||||||
const hash = crypto.createHash('sha256').update(src).digest('hex').slice(0, 16);
|
const line = stdout.slice(pos, nl);
|
||||||
|
pos = nl + 1;
|
||||||
|
return line;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
while (pos < stdout.length) {
|
||||||
const code = K.eval('(compile-module (sx-parse ' + JSON.stringify(src) + '))');
|
const line = nextLine();
|
||||||
|
if (line === null) break;
|
||||||
|
const trimmed = line.trim();
|
||||||
|
|
||||||
if (typeof code === 'string' && code.startsWith('Error')) {
|
// ok-len EPOCH LEN — read LEN bytes as value
|
||||||
console.error(' SKIP', file, '—', code);
|
const lenMatch = trimmed.match(/^\(ok-len (\d+) (\d+)\)$/);
|
||||||
skipped++;
|
if (lenMatch) {
|
||||||
continue;
|
const ep = parseInt(lenMatch[1]);
|
||||||
|
const len = parseInt(lenMatch[2]);
|
||||||
|
// Read exactly len bytes — latin1 encoding preserves byte positions
|
||||||
|
const rawValue = stdout.slice(pos, pos + len);
|
||||||
|
// Re-encode to proper UTF-8
|
||||||
|
const value = Buffer.from(rawValue, 'latin1').toString('utf8');
|
||||||
|
pos += len;
|
||||||
|
// skip trailing newline
|
||||||
|
if (pos < stdout.length && stdout.charCodeAt(pos) === 10) pos++;
|
||||||
|
|
||||||
|
const file = compileEpochs[ep];
|
||||||
|
if (file) {
|
||||||
|
if (value === 'nil' || value.startsWith('(error')) {
|
||||||
|
console.error(' SKIP', file, '—', value.slice(0, 60));
|
||||||
|
skipped++;
|
||||||
|
} else {
|
||||||
|
const hash = crypto.createHash('sha256')
|
||||||
|
.update(fs.readFileSync(path.join(sxDir, file), 'utf8'))
|
||||||
|
.digest('hex').slice(0, 16);
|
||||||
|
|
||||||
|
const sxbc = '(sxbc 1 "' + hash + '"\n (code\n ' +
|
||||||
|
value.replace(/^\{/, '').replace(/\}$/, '').trim() + '))\n';
|
||||||
|
|
||||||
|
const outPath = path.join(sxDir, file.replace(/\.sx$/, '.sxbc'));
|
||||||
|
fs.writeFileSync(outPath, sxbc);
|
||||||
|
|
||||||
|
const size = fs.statSync(outPath).size;
|
||||||
|
console.log(' ok', file, '→', Math.round(size / 1024) + 'K');
|
||||||
|
compiled++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const sx = serializeModuleToSx(code, hash);
|
// Simple ok or error — skip
|
||||||
|
if (trimmed.match(/^\(ok \d+/) || trimmed.match(/^\(error \d+/)) {
|
||||||
// Write .sxbc (s-expression format)
|
if (trimmed.match(/^\(error/)) {
|
||||||
const outPath = srcPath.replace(/\.sx$/, '.sxbc');
|
const epMatch = trimmed.match(/^\(error (\d+)/);
|
||||||
fs.writeFileSync(outPath, sx);
|
if (epMatch) {
|
||||||
|
const ep = parseInt(epMatch[1]);
|
||||||
// Also write .sxbc.json for backwards compatibility during transition
|
const file = compileEpochs[ep];
|
||||||
const json = {
|
if (file) {
|
||||||
magic: 'SXBC',
|
console.error(' SKIP', file, '—', trimmed.slice(0, 80));
|
||||||
version: 1,
|
skipped++;
|
||||||
hash: hash,
|
}
|
||||||
module: serializeModuleToJson(code),
|
}
|
||||||
};
|
}
|
||||||
const jsonPath = srcPath.replace(/\.sx$/, '.sxbc.json');
|
continue;
|
||||||
fs.writeFileSync(jsonPath, JSON.stringify(json));
|
|
||||||
|
|
||||||
const size = fs.statSync(outPath).size;
|
|
||||||
console.log(' ok', file, '→', Math.round(size / 1024) + 'K');
|
|
||||||
compiled++;
|
|
||||||
} catch (e) {
|
|
||||||
console.error(' SKIP', file, '—', e.message || e);
|
|
||||||
skipped++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('Done:', compiled, 'compiled,', skipped, 'skipped');
|
// Copy compiled files to shared/static/wasm/sx/ for web serving
|
||||||
|
const staticSxDir = path.resolve(__dirname, '..', '..', '..', 'shared', 'static', 'wasm', 'sx');
|
||||||
// --- S-expression serialization ---
|
if (fs.existsSync(staticSxDir)) {
|
||||||
|
let copied = 0;
|
||||||
function serializeModuleToSx(code, hash) {
|
for (const file of FILES) {
|
||||||
return '(sxbc 1 "' + hash + '"\n ' + serializeCodeToSx(code, 2) + ')\n';
|
for (const ext of ['.sxbc', '.sxbc.json']) {
|
||||||
}
|
const src = path.join(sxDir, file.replace(/\.sx$/, ext));
|
||||||
|
const dst = path.join(staticSxDir, file.replace(/\.sx$/, ext));
|
||||||
function serializeCodeToSx(code, indent) {
|
if (fs.existsSync(src)) {
|
||||||
const pad = ' '.repeat(indent);
|
fs.copyFileSync(src, dst);
|
||||||
const bc = extractList(code.bytecode);
|
copied++;
|
||||||
const consts = extractList(code.constants);
|
}
|
||||||
const arity = code.arity || code['arity'] || 0;
|
}
|
||||||
const uvc = code['upvalue-count'] || 0;
|
|
||||||
|
|
||||||
let parts = ['(code'];
|
|
||||||
if (arity) parts.push(' :arity ' + arity);
|
|
||||||
if (uvc) parts.push(' :upvalue-count ' + uvc);
|
|
||||||
parts.push('\n' + pad + ' :bytecode (' + bc.join(' ') + ')');
|
|
||||||
parts.push('\n' + pad + ' :constants (');
|
|
||||||
|
|
||||||
const constStrs = consts.map(c => serializeConstToSx(c, indent + 4));
|
|
||||||
if (constStrs.length > 0) {
|
|
||||||
parts.push('\n' + constStrs.map(s => pad + ' ' + s).join('\n'));
|
|
||||||
parts.push(')');
|
|
||||||
} else {
|
|
||||||
parts[parts.length - 1] += ')';
|
|
||||||
}
|
}
|
||||||
parts.push(')');
|
console.log('Copied', copied, 'files to', staticSxDir);
|
||||||
return parts.join('');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function serializeConstToSx(c, indent) {
|
const total = Date.now() - t0;
|
||||||
if (c === null || c === undefined) return 'nil';
|
console.log('Done:', compiled, 'compiled,', skipped, 'skipped in', Math.round(total / 1000) + 's');
|
||||||
if (typeof c === 'number') return String(c);
|
|
||||||
if (typeof c === 'string') return '"' + c.replace(/\\/g, '\\\\').replace(/"/g, '\\"') + '"';
|
|
||||||
if (typeof c === 'boolean') return c ? 'true' : 'false';
|
|
||||||
if (c._type === 'symbol') return "'" + c.name;
|
|
||||||
if (c._type === 'keyword') return ':' + c.name;
|
|
||||||
if (c._type === 'list') {
|
|
||||||
const items = extractList(c).map(x => serializeConstToSx(x, indent));
|
|
||||||
return '(list ' + items.join(' ') + ')';
|
|
||||||
}
|
|
||||||
// Code object (nested lambda bytecode)
|
|
||||||
if (c.bytecode) return serializeCodeToSx(c, indent);
|
|
||||||
if (c._type === 'dict') {
|
|
||||||
const bc = c.get ? c.get('bytecode') : c.bytecode;
|
|
||||||
if (bc) return serializeCodeToSx(c, indent);
|
|
||||||
// Regular dict — serialize as {:key val ...}
|
|
||||||
const entries = [];
|
|
||||||
if (c.forEach) c.forEach((v, k) => { entries.push(':' + k + ' ' + serializeConstToSx(v, indent)); });
|
|
||||||
return '{' + entries.join(' ') + '}';
|
|
||||||
}
|
|
||||||
return 'nil';
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- JSON serialization (backwards compat) ---
|
fs.unlinkSync(tmpFile);
|
||||||
|
|
||||||
function serializeModuleToJson(code) {
|
|
||||||
const result = {
|
|
||||||
bytecode: extractList(code.bytecode),
|
|
||||||
constants: extractList(code.constants).map(serializeConstantJson),
|
|
||||||
};
|
|
||||||
const arity = code.arity || code['arity'];
|
|
||||||
const uvc = code['upvalue-count'];
|
|
||||||
const locals = code.locals || code['locals'];
|
|
||||||
if (arity) result.arity = typeof arity === 'number' ? arity : 0;
|
|
||||||
if (uvc) result['upvalue-count'] = typeof uvc === 'number' ? uvc : 0;
|
|
||||||
if (locals) result.locals = typeof locals === 'number' ? locals : 0;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
function serializeConstantJson(c) {
|
|
||||||
if (c === null || c === undefined) return { t: 'nil' };
|
|
||||||
if (typeof c === 'number') return { t: 'n', v: c };
|
|
||||||
if (typeof c === 'string') return { t: 's', v: c };
|
|
||||||
if (typeof c === 'boolean') return { t: 'b', v: c };
|
|
||||||
if (c._type === 'symbol') return { t: 'sym', v: c.name };
|
|
||||||
if (c._type === 'keyword') return { t: 'kw', v: c.name };
|
|
||||||
if (c._type === 'list') return { t: 'list', v: extractList(c).map(serializeConstantJson) };
|
|
||||||
if (c.bytecode) return { t: 'code', v: serializeModuleToJson(c) };
|
|
||||||
if (c._type === 'dict') {
|
|
||||||
const bc = c.get ? c.get('bytecode') : c.bytecode;
|
|
||||||
if (bc) return { t: 'code', v: serializeModuleToJson(c) };
|
|
||||||
const entries = {};
|
|
||||||
if (c.forEach) c.forEach((v, k) => { entries[k] = serializeConstantJson(v); });
|
|
||||||
return { t: 'dict', v: entries };
|
|
||||||
}
|
|
||||||
return { t: 'nil' };
|
|
||||||
}
|
|
||||||
|
|
||||||
function extractList(v) {
|
|
||||||
if (!v) return [];
|
|
||||||
if (Array.isArray(v)) return v;
|
|
||||||
if (v._type === 'list' && v.items) return v.items;
|
|
||||||
if (v.items) return v.items;
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|||||||
3
shared/static/wasm/sx/boot.sxbc
Normal file
3
shared/static/wasm/sx/boot.sxbc
Normal file
File diff suppressed because one or more lines are too long
1
shared/static/wasm/sx/boot.sxbc.json
Normal file
1
shared/static/wasm/sx/boot.sxbc.json
Normal file
File diff suppressed because one or more lines are too long
3
shared/static/wasm/sx/orchestration.sxbc
Normal file
3
shared/static/wasm/sx/orchestration.sxbc
Normal file
File diff suppressed because one or more lines are too long
1
shared/static/wasm/sx/orchestration.sxbc.json
Normal file
1
shared/static/wasm/sx/orchestration.sxbc.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user