Content-addressed on-demand loading: Merkle DAG for all browser assets

Replace the monolithic 500KB <script data-components> block with a 25KB
JSON manifest mapping names to content hashes. Every definition —
components, islands, macros, client libraries, bytecode modules, and
WASM binaries — is now content-addressed and loaded on demand.

Server (sx_server.ml):
- build_hash_index: Merkle DAG over all definitions — topological sort,
  hash leaves first, component refs become @h:{hash} in instantiated form
- /sx/h/{hash} endpoint: serves definitions with Cache-Control: immutable
- Per-page manifest in <script data-sx-manifest> with defs + modules + boot
- Client library .sx files hashed as whole units (tw.sx, tw-layout.sx, etc.)
- .sxbc modules and WASM kernel hashed individually

Browser (sx-platform.js):
- Content-addressed boot: inline script loads kernel + platform by hash
- loadDefinitionByHash: recursive dep resolution with @h: rewriting
- resolveHash: 3-tier cache (memory → localStorage → fetch /sx/h/{hash})
- __resolve-symbol extended for manifest-based component + library loading
- Cache API wrapper intercepts .wasm fetches for offline caching
- Eager pre-loading of plain symbol deps for CEK evaluator compatibility

Shell template (shell.sx):
- Monolithic <script data-components> removed
- data-sx-manifest script with full hash manifest
- Inline bootstrap replaces <script src="...?v="> with CID-based loading

Second visit loads zero bytes from network. Changed content gets a new
hash — only that item refetched (Merkle propagation).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-14 10:14:39 +00:00
parent 2cba359fdf
commit 1d83ccba3c
4 changed files with 801 additions and 44 deletions

View File

@@ -272,6 +272,11 @@
break;
}
}
// Content-addressed boot: script loaded from /sx/h/{hash}, not /static/wasm/.
// Fall back to /static/wasm/ base URL for module-manifest.json and .sx sources.
if (!_baseUrl || _baseUrl.indexOf("/sx/h/") !== -1) {
_baseUrl = "/static/wasm/";
}
}
})();
@@ -350,19 +355,56 @@
/**
* Try loading a pre-compiled .sxbc bytecode module (SX text format).
* Uses K.loadModule which handles VM suspension (import requests).
* Content-addressed: checks localStorage by hash, fetches /sx/h/{hash} on miss.
* Returns true on success, null on failure (caller falls back to .sx source).
*/
function loadBytecodeFile(path) {
var sxbcPath = path.replace(/\.sx$/, '.sxbc');
var url = _baseUrl + sxbcPath + _sxbcCacheBust;
try {
var xhr = new XMLHttpRequest();
xhr.open("GET", url, false);
xhr.send();
if (xhr.status !== 200) return null;
var sxbcFile = sxbcPath.split('/').pop(); // e.g. "dom.sxbc"
// Content-addressed resolution: manifest → localStorage → fetch by hash
var text = null;
var manifest = loadPageManifest();
if (manifest && manifest.modules && manifest.modules[sxbcFile]) {
var hash = manifest.modules[sxbcFile];
var lsKey = "sx:h:" + hash;
try {
text = localStorage.getItem(lsKey);
} catch(e) {}
if (!text) {
// Fetch by content hash
try {
var xhr2 = new XMLHttpRequest();
xhr2.open("GET", "/sx/h/" + hash, false);
xhr2.send();
if (xhr2.status === 200) {
text = xhr2.responseText;
// Strip comment line if present
if (text.charAt(0) === ';') {
var nl = text.indexOf('\n');
if (nl >= 0) text = text.substring(nl + 1);
}
try { localStorage.setItem(lsKey, text); } catch(e) {}
}
} catch(e) {}
}
}
// Fallback: fetch by URL (pre-content-addressed path)
if (!text) {
var url = _baseUrl + sxbcPath + _sxbcCacheBust;
try {
var xhr = new XMLHttpRequest();
xhr.open("GET", url, false);
xhr.send();
if (xhr.status !== 200) return null;
text = xhr.responseText;
} catch(e) { return null; }
}
try {
// Parse the sxbc text to get the SX tree
var parsed = K.parse(xhr.responseText);
var parsed = K.parse(text);
if (!parsed || !parsed.length) return null;
var sxbc = parsed[0]; // (sxbc version hash (code ...))
if (!sxbc || sxbc._type !== "list" || !sxbc.items) return null;
@@ -626,10 +668,149 @@
return _symbolIndex;
}
// ================================================================
// Content-addressed definition loader
//
// The page manifest maps component names to content hashes.
// When a ~component symbol is missing, we resolve its hash,
// check localStorage, fetch from /sx/h/{hash} if needed,
// then load the definition (recursively resolving @h: deps).
// ================================================================
var _pageManifest = null; // { defs: { "~name": "hash", ... } }
var _hashToName = {}; // hash → "~name"
var _hashCache = {}; // hash → definition text (in-memory)
var _loadedHashes = {}; // hash → true (already K.load'd)
function loadPageManifest() {
if (_pageManifest) return _pageManifest;
var el = document.querySelector('script[data-sx-manifest]');
if (!el) return null;
try {
_pageManifest = JSON.parse(el.textContent);
var defs = _pageManifest.defs || {};
for (var name in defs) {
_hashToName[defs[name]] = name;
}
return _pageManifest;
} catch(e) {
console.warn("[sx] Failed to parse manifest:", e);
return null;
}
}
function resolveHash(hash) {
// 1. In-memory cache
if (_hashCache[hash]) return _hashCache[hash];
// 2. localStorage
var key = "sx:h:" + hash;
try {
var cached = localStorage.getItem(key);
if (cached) {
_hashCache[hash] = cached;
return cached;
}
} catch(e) {}
// 3. Fetch from server
try {
var xhr = new XMLHttpRequest();
xhr.open("GET", "/sx/h/" + hash, false);
xhr.send();
if (xhr.status === 200) {
var def = xhr.responseText;
_hashCache[hash] = def;
try { localStorage.setItem(key, def); } catch(e) {}
return def;
}
} catch(e) {
console.warn("[sx] Failed to fetch hash " + hash + ":", e);
}
return null;
}
function loadDefinitionByHash(hash) {
if (_loadedHashes[hash]) return true;
// Mark in-progress immediately to prevent circular recursion
_loadedHashes[hash] = "loading";
var def = resolveHash(hash);
if (!def) { delete _loadedHashes[hash]; return false; }
// Strip comment line (;; ~name\n) from start
var src = def;
if (src.charAt(0) === ';') {
var nl = src.indexOf('\n');
if (nl >= 0) src = src.substring(nl + 1);
}
// Find and recursively load @h: dependencies before loading this one
var hashRe = /@h:([0-9a-f]{16})/g;
var match;
while ((match = hashRe.exec(src)) !== null) {
var depHash = match[1];
if (!_loadedHashes[depHash]) {
loadDefinitionByHash(depHash);
}
}
// Rewrite @h:xxx back to ~names for the SX evaluator
var rewritten = src.replace(/@h:([0-9a-f]{16})/g, function(_m, h) {
return _hashToName[h] || ("@h:" + h);
});
// Eagerly pre-load any plain manifest symbols referenced in this definition.
// The CEK evaluator doesn't call __resolve-symbol, so deps must be present
// before the definition is called. Scan for word boundaries matching manifest keys.
if (_pageManifest && _pageManifest.defs) {
var words = rewritten.match(/[a-zA-Z_][a-zA-Z0-9_?!-]*/g) || [];
for (var wi = 0; wi < words.length; wi++) {
var w = words[wi];
if (w !== name && _pageManifest.defs[w] && !_loadedHashes[_pageManifest.defs[w]]) {
loadDefinitionByHash(_pageManifest.defs[w]);
}
}
}
// Prepend the component name back into the definition.
// Only for single-definition forms (defcomp/defisland/defmacro) where
// the name was stripped for hashing. Multi-define files (client libs)
// already contain named (define name ...) forms.
var name = _hashToName[hash];
if (name) {
var isMultiDefine = /\(define\s+[a-zA-Z]/.test(rewritten);
if (!isMultiDefine) {
rewritten = rewritten.replace(
/^\((defcomp|defisland|defmacro|define)\s/,
function(_m, kw) { return "(" + kw + " " + name + " "; }
);
}
}
try {
K.load(rewritten);
_loadedHashes[hash] = true;
return true;
} catch(e) {
console.warn("[sx] Failed to load hash " + hash + " (" + (name || "?") + "):", e);
return false;
}
}
// Register the resolve hook — called by the VM when GLOBAL_GET fails
K.registerNative("__resolve-symbol", function(args) {
var name = args[0];
if (!name) return null;
// Content-addressed resolution — components, libraries, macros
var manifest = loadPageManifest();
if (manifest && manifest.defs && manifest.defs[name]) {
var hash = manifest.defs[name];
if (!_loadedHashes[hash]) {
loadDefinitionByHash(hash);
return null; // VM re-lookups after hook
}
}
// Library-level resolution (existing path — .sxbc modules)
var idx = buildSymbolIndex();
if (!idx || !idx[name]) return null;
var lib = idx[name];

View File

@@ -7,6 +7,7 @@
(sx-css :as string?)
(component-hash :as string?)
(component-defs :as string?)
(component-manifest :as string?)
(pages-sx :as string?)
(page-sx :as string?)
(body-html :as string?)
@@ -61,11 +62,12 @@
(style
(raw!
"[data-sx-island] button,[data-sx-island] a,[data-sx-island] [role=button]{cursor:pointer}"))
(script
:type "text/sx"
:data-components true
:data-hash component-hash
(raw! (or component-defs "")))
(when
component-manifest
(script
:type "application/json"
:data-sx-manifest true
(raw! component-manifest)))
(when
init-sx
(script :type "text/sx" :data-init true (raw! init-sx)))
@@ -74,12 +76,6 @@
:type "text/sx"
:data-mount "#sx-root"
(raw! (or page-sx "")))
(<>
(script
:src (str
asset-url
"/wasm/sx_browser.bc.wasm.js?v="
(or wasm-hash "0")))
(script
:src (str asset-url "/wasm/sx-platform.js?v=" (or platform-hash "0"))
:data-sxbc-hash (or sxbc-hash "0")))))))
(script
(raw!
"\n(function(){\n var m=document.querySelector('[data-sx-manifest]');\n if(!m)return;\n var j=JSON.parse(m.textContent);\n\n // Cache API wrapper — intercept .wasm fetches for offline caching.\n if(typeof caches!=='undefined'){\n var _fetch=window.fetch;\n var CACHE='sx-wasm-v1';\n window.fetch=function(input,init){\n var url=(typeof input==='string')?input:\n (input instanceof URL)?input.href:\n (input&&input.url)||'';\n if(url.indexOf('.wasm')!==-1){\n return caches.open(CACHE).then(function(c){\n return c.match(url).then(function(r){\n if(r)return r;\n return _fetch(input,init).then(function(resp){\n if(resp.ok)c.put(url,resp.clone());\n return resp;\n });\n });\n });\n }\n return _fetch(input,init);\n };\n }\n\n // Content-addressed boot: load kernel + platform by hash\n if(!j.boot)return;\n j.boot.forEach(function(h){\n var s=document.createElement('script');\n s.src='/sx/h/'+h;\n document.head.appendChild(s);\n });\n})();\n"))))))