diff --git a/hosts/ocaml/bin/sx_server.ml b/hosts/ocaml/bin/sx_server.ml index a3b048f3..e9ce1e65 100644 --- a/hosts/ocaml/bin/sx_server.ml +++ b/hosts/ocaml/bin/sx_server.ml @@ -188,6 +188,56 @@ let rec serialize_value = function "(make-spread {" ^ String.concat " " items ^ "})" | _ -> "nil" +(** Collect all ~-prefixed symbol references from an AST value. + Walks the tree recursively, returns a deduplicated list of symbol names + like ["~card"; "~layout/base"]. Used for dependency analysis. *) +let collect_tilde_refs body = + let seen = Hashtbl.create 16 in + let rec walk = function + | Symbol s when String.length s > 0 && s.[0] = '~' -> + if not (Hashtbl.mem seen s) then Hashtbl.replace seen s () + | List items | ListRef { contents = items } -> + List.iter walk items + | Dict d -> + Hashtbl.iter (fun _k v -> walk v) d + | Spread pairs -> + List.iter (fun (_k, v) -> walk v) pairs + | _ -> () + in + walk body; + Hashtbl.fold (fun k () acc -> k :: acc) seen [] + +(** Serialize a value to SX text, replacing ~-prefixed symbol references + with their content hashes from the index. Symbols not in the index + are emitted verbatim (unknown ref or non-component symbol). *) +let rec serialize_value_hashed (index : (string, string) Hashtbl.t) = function + | Nil -> "nil" + | Bool true -> "true" + | Bool false -> "false" + | Number n -> + if Float.is_integer n then string_of_int (int_of_float n) + else Printf.sprintf "%g" n + | String s -> "\"" ^ escape_sx_string s ^ "\"" + | Symbol s when String.length s > 0 && s.[0] = '~' -> + (match Hashtbl.find_opt index s with + | Some h -> "@h:" ^ h + | None -> s) + | Symbol s -> s + | Keyword k -> ":" ^ k + | List items | ListRef { contents = items } -> + "(" ^ String.concat " " (List.map (serialize_value_hashed index) items) ^ ")" + | Dict d -> + let pairs = Hashtbl.fold (fun k v acc -> + (Printf.sprintf ":%s %s" k (serialize_value_hashed index v)) :: acc) d [] in + "{" ^ String.concat " " (List.sort String.compare pairs) ^ "}" + | RawHTML s -> "\"" ^ escape_sx_string s ^ "\"" + | SxExpr s -> s + | Spread pairs -> + let items = List.map (fun (k, v) -> + Printf.sprintf ":%s %s" k (serialize_value_hashed index v)) pairs in + "(make-spread {" ^ String.concat " " items ^ "})" + | _ -> "nil" + (** Request epoch — monotonically increasing, set by (epoch N) from Python. All responses are tagged with the current epoch so Python can discard stale messages from previous requests. Makes pipe desync impossible. *) @@ -1151,6 +1201,41 @@ let rebind_host_extensions env = | _ -> raise (Eval_error "register-special-form!: expected (name handler)"))); ignore (env_bind env "*custom-special-forms*" Sx_ref.custom_special_forms) +(* Path-based naming for unnamed definitions *) +(* ====================================================================== *) + +let def_keywords = ["defcomp"; "defisland"; "defmacro"; "define"; + "defhandler"; "defstyle"; "deftype"; "defeffect"; + "defrelation"; "deftest"; "defpage"] + +(* Inject path-derived name into unnamed definitions. + (defcomp (params) body) -> (defcomp ~path/name (params) body) + Only applied when base_dir is provided (service components). *) +let inject_path_name expr path base_dir = + match expr with + | List (Symbol kw :: rest) when List.mem kw def_keywords -> + (match rest with + | Symbol _ :: _ -> expr (* Already named *) + | _ -> + (* Unnamed — derive name from file path relative to base_dir *) + let rel = if String.length path > String.length base_dir + 1 + then String.sub path (String.length base_dir + 1) + (String.length path - String.length base_dir - 1) + else Filename.basename path in + let stem = if Filename.check_suffix rel ".sx" + then String.sub rel 0 (String.length rel - 3) + else rel in + (* index files are known by their directory *) + let name = if Filename.basename stem = "index" + then let d = Filename.dirname stem in + if d = "." then "index" else d + else stem in + (* Components/islands get ~ prefix *) + let prefixed = if kw = "defcomp" || kw = "defisland" + then "~" ^ name else name in + List (Symbol kw :: Symbol prefixed :: rest)) + | _ -> expr + (* Command dispatch *) (* ====================================================================== *) @@ -1159,14 +1244,18 @@ let rec dispatch env cmd = | List [Symbol "ping"] -> send_ok_string "ocaml-cek" - | List [Symbol "load"; String path] -> + | List [Symbol "load"; String path] + | List [Symbol "load"; String path; String _] -> + let base_dir = match cmd with + | List [_; _; String b] -> b | _ -> "" in (try let exprs = Sx_parser.parse_file path in let prev_file = if Sx_types.env_has env "*current-file*" then Some (Sx_types.env_get env "*current-file*") else None in ignore (Sx_types.env_bind env "*current-file*" (String path)); let count = ref 0 in List.iter (fun expr -> - (try ignore (eval_expr_io expr (Env env)) + let expr' = if base_dir <> "" then inject_path_name expr path base_dir else expr in + (try ignore (eval_expr_io expr' (Env env)) with Eval_error msg -> Printf.eprintf "[load] %s: %s\n%!" (Filename.basename path) msg); incr count @@ -2100,6 +2189,7 @@ let http_render_page env path headers = Keyword "body-html"; String body_html; Keyword "component-defs"; get_shell "component-defs"; Keyword "component-hash"; get_shell "component-hash"; + Keyword "component-manifest"; get_shell "component-manifest"; Keyword "pages-sx"; get_shell "pages-sx"; Keyword "sx-css"; get_shell "sx-css"; Keyword "asset-url"; get_shell "asset-url"; @@ -2275,6 +2365,8 @@ let http_render_page_streaming env path _headers fd page_name = Keyword "body-html"; String body_html; Keyword "component-defs"; get_shell "component-defs"; Keyword "component-hash"; get_shell "component-hash"; + Keyword "component-manifest"; get_shell "component-manifest"; + Keyword "client-libs"; get_shell "client-libs"; Keyword "pages-sx"; get_shell "pages-sx"; Keyword "sx-css"; get_shell "sx-css"; Keyword "asset-url"; get_shell "asset-url"; @@ -2494,6 +2586,198 @@ let read_css_file path = In_channel.with_open_text path In_channel.input_all else "" +(* ── Content-addressed hash index ──────────────────────────────── + Merkle DAG over all definitions in env.bindings. + Each definition gets a SHA-256 hash of its *instantiated* form + (component references replaced with their hashes). *) + +type hash_index = { + name_to_hash : (string, string) Hashtbl.t; (** "~card" → "a1b2c3..." *) + hash_to_def : (string, string) Hashtbl.t; (** hash → instantiated definition text *) + hash_to_name : (string, string) Hashtbl.t; (** hash → "~card" *) + dependents : (string, string list) Hashtbl.t; (** "~card" → ["~my-page", ...] *) +} [@@warning "-69"] + +let _hash_index : hash_index option ref = ref None + +(** Canonical form for hashing — name excluded (it's the key, not content). + Includes params, affinity, has_children, and body with refs hashed. *) +let canonical_form_component (index : (string, string) Hashtbl.t) c = + let ps = String.concat " " ( + "&key" :: c.c_params @ + (if c.c_has_children then ["&rest"; "children"] else [])) in + Printf.sprintf "(defcomp (%s) :affinity \"%s\" %s)" + ps c.c_affinity (serialize_value_hashed index c.c_body) + +let canonical_form_island (index : (string, string) Hashtbl.t) i = + let ps = String.concat " " ( + "&key" :: i.i_params @ + (if i.i_has_children then ["&rest"; "children"] else [])) in + Printf.sprintf "(defisland (%s) %s)" + ps (serialize_value_hashed index i.i_body) + +let canonical_form_macro (index : (string, string) Hashtbl.t) m = + let ps = String.concat " " ( + m.m_params @ + (match m.m_rest_param with Some r -> ["&rest"; r] | None -> [])) in + Printf.sprintf "(defmacro (%s) %s)" + ps (serialize_value_hashed index m.m_body) + + +(** Compute truncated SHA-256 hash (16 hex chars = 64 bits). *) +let hash_string s = + String.sub (Digest.string s |> Digest.to_hex) 0 16 + +(** Build the Merkle hash index from env.bindings. + Topological sort: hash leaves first (no ~deps), propagate up. *) +let build_hash_index env = + let name_to_hash = Hashtbl.create 256 in + let hash_to_def = Hashtbl.create 256 in + let hash_to_name = Hashtbl.create 256 in + let dependents = Hashtbl.create 256 in + + (* Phase 0: hash client library source files as whole units. + Each file gets one hash. All (define name ...) forms in the file + map to that hash so any symbol triggers loading the whole file. *) + let project_dir = try Sys.getenv "SX_PROJECT_DIR" with Not_found -> "." in + let templates_dir = project_dir ^ "/shared/sx/templates" in + let client_lib_names = get_app_list "client-libs" ["tw-layout.sx"; "tw-type.sx"; "tw.sx"] in + List.iter (fun lib_name -> + let path = templates_dir ^ "/" ^ lib_name in + if Sys.file_exists path then begin + let src = In_channel.with_open_text path In_channel.input_all in + let h = hash_string src in + Hashtbl.replace hash_to_def h src; + (* Extract all (define name ...) forms and map each name to this hash *) + let exprs = try Sx_parser.parse_all src with _ -> [] in + let first_name = ref "" in + List.iter (fun expr -> + match expr with + | List (Symbol "define" :: Symbol name :: _) -> + Hashtbl.replace name_to_hash name h; + if !first_name = "" then first_name := name + | _ -> () + ) exprs; + (* Map the hash to the first define name for debuggability *) + if !first_name <> "" then + Hashtbl.replace hash_to_name h !first_name + end + ) client_lib_names; + + (* Phase 1: collect all component/island/macro definitions and their direct deps *) + let defs : (string * [ `Comp of component | `Island of island + | `Macro of macro ]) list ref = ref [] in + let deps : (string, string list) Hashtbl.t = Hashtbl.create 256 in + + Hashtbl.iter (fun sym v -> + let name = Sx_types.unintern sym in + match v with + | Component c when String.length name > 0 && name.[0] = '~' -> + let refs = collect_tilde_refs c.c_body in + defs := (name, `Comp c) :: !defs; + Hashtbl.replace deps name refs; + (* Register reverse deps *) + List.iter (fun dep -> + let prev = try Hashtbl.find dependents dep with Not_found -> [] in + Hashtbl.replace dependents dep (name :: prev) + ) refs + | Island i when String.length name > 0 && name.[0] = '~' -> + let refs = collect_tilde_refs i.i_body in + defs := (name, `Island i) :: !defs; + Hashtbl.replace deps name refs; + List.iter (fun dep -> + let prev = try Hashtbl.find dependents dep with Not_found -> [] in + Hashtbl.replace dependents dep (name :: prev) + ) refs + | Macro m when (match m.m_name with Some n -> String.length n > 0 | None -> false) -> + let refs = collect_tilde_refs m.m_body in + let mname = match m.m_name with Some n -> n | None -> name in + defs := (mname, `Macro m) :: !defs; + Hashtbl.replace deps mname refs; + List.iter (fun dep -> + let prev = try Hashtbl.find dependents dep with Not_found -> [] in + Hashtbl.replace dependents dep (mname :: prev) + ) refs + | _ -> () + ) env.bindings; + + (* Phase 2: Kahn's topological sort *) + let all_names = Hashtbl.create 256 in + List.iter (fun (name, _) -> Hashtbl.replace all_names name true) !defs; + (* In-degree: count how many of this def's deps are also in our set *) + let in_degree = Hashtbl.create 256 in + List.iter (fun (name, _) -> + let d = try Hashtbl.find deps name with Not_found -> [] in + let count = List.length (List.filter (fun dep -> Hashtbl.mem all_names dep) d) in + Hashtbl.replace in_degree name count + ) !defs; + + (* Queue: all defs with in-degree 0 (leaves) *) + let queue = Queue.create () in + List.iter (fun (name, _) -> + if Hashtbl.find in_degree name = 0 then Queue.push name queue + ) !defs; + + (* Lookup map for defs by name *) + let def_map = Hashtbl.create 256 in + List.iter (fun (name, def) -> Hashtbl.replace def_map name def) !defs; + + let processed = ref 0 in + + (* Phase 3: process in topological order *) + while not (Queue.is_empty queue) do + let name = Queue.pop queue in + incr processed; + (* All deps of this def are already hashed — compute canonical form *) + let canonical = match Hashtbl.find_opt def_map name with + | Some (`Comp c) -> canonical_form_component name_to_hash c + | Some (`Island i) -> canonical_form_island name_to_hash i + | Some (`Macro m) -> canonical_form_macro name_to_hash m + | None -> "" + in + if canonical <> "" then begin + let h = hash_string canonical in + Hashtbl.replace name_to_hash name h; + Hashtbl.replace hash_to_def h canonical; + Hashtbl.replace hash_to_name h name + end; + (* Decrease in-degree of dependents, enqueue if zero *) + let rev_deps = try Hashtbl.find dependents name with Not_found -> [] in + List.iter (fun dep_name -> + if Hashtbl.mem in_degree dep_name then begin + let d = Hashtbl.find in_degree dep_name in + Hashtbl.replace in_degree dep_name (d - 1); + if d - 1 = 0 then Queue.push dep_name queue + end + ) rev_deps + done; + + (* Any remaining defs with in-degree > 0 have circular deps — hash without ref replacement *) + if !processed < List.length !defs then begin + List.iter (fun (name, _) -> + if not (Hashtbl.mem name_to_hash name) then begin + let canonical = match Hashtbl.find_opt def_map name with + | Some (`Comp c) -> canonical_form_component name_to_hash c + | Some (`Island i) -> canonical_form_island name_to_hash i + | Some (`Macro m) -> canonical_form_macro name_to_hash m + | None -> "" + in + if canonical <> "" then begin + let h = hash_string canonical in + Hashtbl.replace name_to_hash name h; + Hashtbl.replace hash_to_def h canonical; + Hashtbl.replace hash_to_name h name + end + end + ) !defs + end; + + let idx = { name_to_hash; hash_to_def; hash_to_name; dependents } in + Printf.eprintf "[hash-index] %d definitions, %d hashes\n%!" + (List.length !defs) (Hashtbl.length name_to_hash); + _hash_index := Some idx; + idx + (** Pre-compute shell statics and inject into env as __shell-* vars. *) let http_inject_shell_statics env static_dir sx_sxc = @@ -2557,6 +2841,78 @@ let http_inject_shell_statics env static_dir sx_sxc = read_css_file (static_dir ^ "/styles/" ^ name)) css_file_names) in ignore (env_bind env "__shell-component-defs" (String component_defs)); ignore (env_bind env "__shell-component-hash" (String component_hash)); + (* Build content-addressed hash index *) + let hidx = build_hash_index env in + (* Hash each .sxbc module individually and add to the hash index. + Each module's content is stored by hash; exported symbols map to the module hash. *) + let sxbc_dir = static_dir ^ "/wasm/sx" in + let module_manifest_path = sxbc_dir ^ "/module-manifest.json" in + let module_hashes : (string, string) Hashtbl.t = Hashtbl.create 32 in (* module key → hash *) + (if Sys.file_exists module_manifest_path then begin + let manifest_src = In_channel.with_open_text module_manifest_path In_channel.input_all in + (* Simple JSON parse — extract "key": { "file": "...", "exports": [...] } *) + let exprs = try Sx_parser.parse_all ("(" ^ manifest_src ^ ")") with _ -> [] in + ignore exprs; (* The manifest is JSON, not SX — parse it manually *) + (* Read each .sxbc file, hash it, store in hash_to_def *) + if Sys.file_exists sxbc_dir && Sys.is_directory sxbc_dir then begin + let files = Array.to_list (Sys.readdir sxbc_dir) in + let sxbc_files = List.filter (fun f -> Filename.check_suffix f ".sxbc") files in + List.iter (fun fname -> + let fpath = sxbc_dir ^ "/" ^ fname in + let content = In_channel.with_open_bin fpath In_channel.input_all in + let h = hash_string content in + Hashtbl.replace hidx.hash_to_def h content; + Hashtbl.replace hidx.hash_to_name h fname; + (* Map filename (without ext) to hash for the modules section *) + Hashtbl.replace module_hashes fname h + ) sxbc_files + end + end); + (* Hash the WASM bootstrap files — these are the kernel scripts + that must load before anything else. *) + let wasm_dir = static_dir ^ "/wasm" in + let boot_files = ["sx_browser.bc.wasm.js"; "sx-platform.js"] in + let boot_hashes = List.filter_map (fun fname -> + let fpath = wasm_dir ^ "/" ^ fname in + if Sys.file_exists fpath then begin + let content = In_channel.with_open_bin fpath In_channel.input_all in + let h = hash_string content in + Hashtbl.replace hidx.hash_to_def h content; + Hashtbl.replace hidx.hash_to_name h fname; + Some (fname, h) + end else None + ) boot_files in + Printf.eprintf "[hash-index] %d module hashes, %d boot hashes\n%!" + (Hashtbl.length module_hashes) (List.length boot_hashes); + (* Build full manifest JSON: + {"v":1,"defs":{...},"modules":{...},"boot":[["file","hash"],...]} *) + let manifest_buf = Buffer.create 8192 in + Buffer.add_string manifest_buf "{\"v\":1,\"defs\":{"; + let first = ref true in + Hashtbl.iter (fun name hash -> + if not !first then Buffer.add_char manifest_buf ','; + first := false; + Buffer.add_string manifest_buf (Printf.sprintf "\"%s\":\"%s\"" + (escape_sx_string name) hash) + ) hidx.name_to_hash; + Buffer.add_string manifest_buf "},\"modules\":{"; + first := true; + Hashtbl.iter (fun fname hash -> + if not !first then Buffer.add_char manifest_buf ','; + first := false; + Buffer.add_string manifest_buf (Printf.sprintf "\"%s\":\"%s\"" + (escape_sx_string fname) hash) + ) module_hashes; + Buffer.add_string manifest_buf "},\"boot\":["; + first := true; + List.iter (fun (_fname, h) -> + if not !first then Buffer.add_char manifest_buf ','; + first := false; + Buffer.add_string manifest_buf (Printf.sprintf "\"%s\"" h) + ) boot_hashes; + Buffer.add_string manifest_buf "]}"; + let manifest_json = Buffer.contents manifest_buf in + ignore (env_bind env "__shell-component-manifest" (String manifest_json)); (* Build minimal pages-sx from defpage definitions in loaded .sx files. Scans all loaded .sx files in the component dirs for (defpage ...) forms. *) let pages_buf = Buffer.create 4096 in @@ -2750,16 +3106,23 @@ let http_setup_platform_constructors env = | [Env e] -> List (Hashtbl.fold (fun k _v acc -> String (Sx_types.unintern k) :: acc) e.bindings []) | _ -> List []) -let http_load_files env files = - (* Like cli_load_files but tolerant — logs errors, doesn't crash *) +let http_load_files ?(base_dir="") env files = + (* Like cli_load_files but tolerant — logs errors, doesn't crash. + When base_dir is set, unnamed definitions get path-derived names. *) List.iter (fun path -> if Sys.file_exists path then begin try + let prev_file = if Sx_types.env_has env "*current-file*" then Some (Sx_types.env_get env "*current-file*") else None in + ignore (Sx_types.env_bind env "*current-file*" (String path)); let exprs = Sx_parser.parse_file path in List.iter (fun expr -> - try ignore (eval_expr_io expr (Env env)) + let expr' = if base_dir <> "" then inject_path_name expr path base_dir else expr in + try ignore (eval_expr_io expr' (Env env)) with e -> Printf.eprintf "[http-load] %s: %s\n%!" (Filename.basename path) (Printexc.to_string e) - ) exprs + ) exprs; + (match prev_file with + | Some v -> ignore (Sx_types.env_bind env "*current-file*" v) + | None -> ()) with e -> Printf.eprintf "[http-load] parse error %s: %s\n%!" path (Printexc.to_string e) end ) files; @@ -3123,7 +3486,7 @@ let http_mode port = let skip_files = ["primitives.sx"; "types.sx"; "boundary.sx"; "harness.sx"; "eval-rules.sx"; "vm-inline.sx"] in let skip_dirs = ["tests"; "test"; "plans"; "essays"; "spec"; "client-libs"] in - let rec load_dir dir = + let rec load_dir ?(base="") dir = if Sys.file_exists dir && Sys.is_directory dir then begin let entries = Sys.readdir dir in Array.sort String.compare entries; @@ -3131,13 +3494,13 @@ let http_mode port = let path = dir ^ "/" ^ f in if Sys.is_directory path then begin if not (List.mem f skip_dirs) then - load_dir path + load_dir ~base path end else if Filename.check_suffix f ".sx" && not (List.mem f skip_files) && not (String.length f > 5 && String.sub f 0 5 = "test-") && not (Filename.check_suffix f ".test.sx") then - http_load_files env [path] + http_load_files ~base_dir:base env [path] ) entries end in @@ -3148,8 +3511,8 @@ let http_mode port = let docker_path = project_dir ^ "/sxc" in let dev_path = project_dir ^ "/sx/sxc" in if Sys.file_exists docker_path then docker_path else dev_path in - load_dir sx_sxc; - load_dir sx_sx; + load_dir ~base:sx_sxc sx_sxc; + load_dir ~base:sx_sx sx_sx; let t1 = Unix.gettimeofday () in Printf.eprintf "[sx-http] All files loaded in %.3fs\n%!" (t1 -. t0); (* Derive batchable_helpers from __io-registry *) @@ -3620,6 +3983,42 @@ let http_mode port = (escape_sx_string (Printexc.to_string e))) in write_response fd response; true + end else if is_sx && String.length path > 6 && String.sub path 0 6 = "/sx/h/" then begin + let rest = String.sub path 6 (String.length path - 6) in + (* WASM companion assets: /sx/h/sx_browser.bc.wasm.assets/... → /static/wasm/... *) + if String.length rest > 26 && String.sub rest 0 26 = "sx_browser.bc.wasm.assets/" then begin + let asset_path = "/static/wasm/" ^ rest in + write_response fd (serve_static_file static_dir asset_path); true + end else begin + (* Content-addressed definition endpoint: /sx/h/{hash} *) + let hash = rest in + let resp = match !_hash_index with + | Some idx -> + (match Hashtbl.find_opt idx.hash_to_def hash with + | Some def -> + let name = match Hashtbl.find_opt idx.hash_to_name hash with + | Some n -> n | None -> "?" in + (* Detect content type from filename *) + let is_js = Filename.check_suffix name ".js" in + let ct = if is_js then "application/javascript" + else "text/sx; charset=utf-8" in + let body = if is_js then def + else Printf.sprintf ";; %s\n%s" name def in + Printf.sprintf + "HTTP/1.1 200 OK\r\n\ + Content-Type: %s\r\n\ + Content-Length: %d\r\n\ + Cache-Control: public, max-age=31536000, immutable\r\n\ + Access-Control-Allow-Origin: *\r\n\ + Connection: keep-alive\r\n\r\n%s" + ct (String.length body) body + | None -> + http_response ~status:404 "unknown hash") + | None -> + http_response ~status:503 "hash index not built" + in + write_response fd resp; true + end (* inner begin for hash vs wasm-assets *) end else if is_sx then begin (* Streaming pages: chunked transfer, bypass cache. Convert SX URL to flat defpage path: @@ -3901,20 +4300,20 @@ let site_mode () = let skip_files = ["primitives.sx"; "types.sx"; "boundary.sx"; "harness.sx"; "eval-rules.sx"; "vm-inline.sx"] in let skip_dirs = ["tests"; "test"; "plans"; "essays"; "spec"; "client-libs"] in - let rec load_dir dir = + let rec load_dir ?(base="") dir = if Sys.file_exists dir && Sys.is_directory dir then begin let entries = Sys.readdir dir in Array.sort String.compare entries; Array.iter (fun f -> let path = dir ^ "/" ^ f in if Sys.is_directory path then begin - if not (List.mem f skip_dirs) then load_dir path + if not (List.mem f skip_dirs) then load_dir ~base path end else if Filename.check_suffix f ".sx" && not (List.mem f skip_files) && not (String.length f > 5 && String.sub f 0 5 = "test-") && not (Filename.check_suffix f ".test.sx") then - http_load_files env [path] + http_load_files ~base_dir:base env [path] ) entries end in @@ -3924,8 +4323,8 @@ let site_mode () = let docker_path = project_dir ^ "/sxc" in let dev_path = project_dir ^ "/sx/sxc" in if Sys.file_exists docker_path then docker_path else dev_path in - load_dir sx_sxc; - load_dir sx_sx; + load_dir ~base:sx_sxc sx_sxc; + load_dir ~base:sx_sx sx_sx; (* IO registry + app config *) (try match env_get env "__io-registry" with | Dict registry -> diff --git a/hosts/ocaml/browser/sx-platform.js b/hosts/ocaml/browser/sx-platform.js index a43e690a..f406a968 100644 --- a/hosts/ocaml/browser/sx-platform.js +++ b/hosts/ocaml/browser/sx-platform.js @@ -272,6 +272,11 @@ break; } } + // Content-addressed boot: script loaded from /sx/h/{hash}, not /static/wasm/. + // Fall back to /static/wasm/ base URL for module-manifest.json and .sx sources. + if (!_baseUrl || _baseUrl.indexOf("/sx/h/") !== -1) { + _baseUrl = "/static/wasm/"; + } } })(); @@ -350,19 +355,56 @@ /** * Try loading a pre-compiled .sxbc bytecode module (SX text format). * Uses K.loadModule which handles VM suspension (import requests). + * Content-addressed: checks localStorage by hash, fetches /sx/h/{hash} on miss. * Returns true on success, null on failure (caller falls back to .sx source). */ function loadBytecodeFile(path) { var sxbcPath = path.replace(/\.sx$/, '.sxbc'); - var url = _baseUrl + sxbcPath + _sxbcCacheBust; - try { - var xhr = new XMLHttpRequest(); - xhr.open("GET", url, false); - xhr.send(); - if (xhr.status !== 200) return null; + var sxbcFile = sxbcPath.split('/').pop(); // e.g. "dom.sxbc" + // Content-addressed resolution: manifest → localStorage → fetch by hash + var text = null; + var manifest = loadPageManifest(); + if (manifest && manifest.modules && manifest.modules[sxbcFile]) { + var hash = manifest.modules[sxbcFile]; + var lsKey = "sx:h:" + hash; + try { + text = localStorage.getItem(lsKey); + } catch(e) {} + if (!text) { + // Fetch by content hash + try { + var xhr2 = new XMLHttpRequest(); + xhr2.open("GET", "/sx/h/" + hash, false); + xhr2.send(); + if (xhr2.status === 200) { + text = xhr2.responseText; + // Strip comment line if present + if (text.charAt(0) === ';') { + var nl = text.indexOf('\n'); + if (nl >= 0) text = text.substring(nl + 1); + } + try { localStorage.setItem(lsKey, text); } catch(e) {} + } + } catch(e) {} + } + } + + // Fallback: fetch by URL (pre-content-addressed path) + if (!text) { + var url = _baseUrl + sxbcPath + _sxbcCacheBust; + try { + var xhr = new XMLHttpRequest(); + xhr.open("GET", url, false); + xhr.send(); + if (xhr.status !== 200) return null; + text = xhr.responseText; + } catch(e) { return null; } + } + + try { // Parse the sxbc text to get the SX tree - var parsed = K.parse(xhr.responseText); + var parsed = K.parse(text); if (!parsed || !parsed.length) return null; var sxbc = parsed[0]; // (sxbc version hash (code ...)) if (!sxbc || sxbc._type !== "list" || !sxbc.items) return null; @@ -626,10 +668,149 @@ return _symbolIndex; } + // ================================================================ + // Content-addressed definition loader + // + // The page manifest maps component names to content hashes. + // When a ~component symbol is missing, we resolve its hash, + // check localStorage, fetch from /sx/h/{hash} if needed, + // then load the definition (recursively resolving @h: deps). + // ================================================================ + + var _pageManifest = null; // { defs: { "~name": "hash", ... } } + var _hashToName = {}; // hash → "~name" + var _hashCache = {}; // hash → definition text (in-memory) + var _loadedHashes = {}; // hash → true (already K.load'd) + + function loadPageManifest() { + if (_pageManifest) return _pageManifest; + var el = document.querySelector('script[data-sx-manifest]'); + if (!el) return null; + try { + _pageManifest = JSON.parse(el.textContent); + var defs = _pageManifest.defs || {}; + for (var name in defs) { + _hashToName[defs[name]] = name; + } + return _pageManifest; + } catch(e) { + console.warn("[sx] Failed to parse manifest:", e); + return null; + } + } + + function resolveHash(hash) { + // 1. In-memory cache + if (_hashCache[hash]) return _hashCache[hash]; + // 2. localStorage + var key = "sx:h:" + hash; + try { + var cached = localStorage.getItem(key); + if (cached) { + _hashCache[hash] = cached; + return cached; + } + } catch(e) {} + // 3. Fetch from server + try { + var xhr = new XMLHttpRequest(); + xhr.open("GET", "/sx/h/" + hash, false); + xhr.send(); + if (xhr.status === 200) { + var def = xhr.responseText; + _hashCache[hash] = def; + try { localStorage.setItem(key, def); } catch(e) {} + return def; + } + } catch(e) { + console.warn("[sx] Failed to fetch hash " + hash + ":", e); + } + return null; + } + + function loadDefinitionByHash(hash) { + if (_loadedHashes[hash]) return true; + // Mark in-progress immediately to prevent circular recursion + _loadedHashes[hash] = "loading"; + var def = resolveHash(hash); + if (!def) { delete _loadedHashes[hash]; return false; } + + // Strip comment line (;; ~name\n) from start + var src = def; + if (src.charAt(0) === ';') { + var nl = src.indexOf('\n'); + if (nl >= 0) src = src.substring(nl + 1); + } + + // Find and recursively load @h: dependencies before loading this one + var hashRe = /@h:([0-9a-f]{16})/g; + var match; + while ((match = hashRe.exec(src)) !== null) { + var depHash = match[1]; + if (!_loadedHashes[depHash]) { + loadDefinitionByHash(depHash); + } + } + + // Rewrite @h:xxx back to ~names for the SX evaluator + var rewritten = src.replace(/@h:([0-9a-f]{16})/g, function(_m, h) { + return _hashToName[h] || ("@h:" + h); + }); + + // Eagerly pre-load any plain manifest symbols referenced in this definition. + // The CEK evaluator doesn't call __resolve-symbol, so deps must be present + // before the definition is called. Scan for word boundaries matching manifest keys. + if (_pageManifest && _pageManifest.defs) { + var words = rewritten.match(/[a-zA-Z_][a-zA-Z0-9_?!-]*/g) || []; + for (var wi = 0; wi < words.length; wi++) { + var w = words[wi]; + if (w !== name && _pageManifest.defs[w] && !_loadedHashes[_pageManifest.defs[w]]) { + loadDefinitionByHash(_pageManifest.defs[w]); + } + } + } + + // Prepend the component name back into the definition. + // Only for single-definition forms (defcomp/defisland/defmacro) where + // the name was stripped for hashing. Multi-define files (client libs) + // already contain named (define name ...) forms. + var name = _hashToName[hash]; + if (name) { + var isMultiDefine = /\(define\s+[a-zA-Z]/.test(rewritten); + if (!isMultiDefine) { + rewritten = rewritten.replace( + /^\((defcomp|defisland|defmacro|define)\s/, + function(_m, kw) { return "(" + kw + " " + name + " "; } + ); + } + } + + try { + K.load(rewritten); + _loadedHashes[hash] = true; + return true; + } catch(e) { + console.warn("[sx] Failed to load hash " + hash + " (" + (name || "?") + "):", e); + return false; + } + } + // Register the resolve hook — called by the VM when GLOBAL_GET fails K.registerNative("__resolve-symbol", function(args) { var name = args[0]; if (!name) return null; + + // Content-addressed resolution — components, libraries, macros + var manifest = loadPageManifest(); + if (manifest && manifest.defs && manifest.defs[name]) { + var hash = manifest.defs[name]; + if (!_loadedHashes[hash]) { + loadDefinitionByHash(hash); + return null; // VM re-lookups after hook + } + } + + // Library-level resolution (existing path — .sxbc modules) var idx = buildSymbolIndex(); if (!idx || !idx[name]) return null; var lib = idx[name]; diff --git a/shared/static/wasm/sx-platform.js b/shared/static/wasm/sx-platform.js index a43e690a..f406a968 100644 --- a/shared/static/wasm/sx-platform.js +++ b/shared/static/wasm/sx-platform.js @@ -272,6 +272,11 @@ break; } } + // Content-addressed boot: script loaded from /sx/h/{hash}, not /static/wasm/. + // Fall back to /static/wasm/ base URL for module-manifest.json and .sx sources. + if (!_baseUrl || _baseUrl.indexOf("/sx/h/") !== -1) { + _baseUrl = "/static/wasm/"; + } } })(); @@ -350,19 +355,56 @@ /** * Try loading a pre-compiled .sxbc bytecode module (SX text format). * Uses K.loadModule which handles VM suspension (import requests). + * Content-addressed: checks localStorage by hash, fetches /sx/h/{hash} on miss. * Returns true on success, null on failure (caller falls back to .sx source). */ function loadBytecodeFile(path) { var sxbcPath = path.replace(/\.sx$/, '.sxbc'); - var url = _baseUrl + sxbcPath + _sxbcCacheBust; - try { - var xhr = new XMLHttpRequest(); - xhr.open("GET", url, false); - xhr.send(); - if (xhr.status !== 200) return null; + var sxbcFile = sxbcPath.split('/').pop(); // e.g. "dom.sxbc" + // Content-addressed resolution: manifest → localStorage → fetch by hash + var text = null; + var manifest = loadPageManifest(); + if (manifest && manifest.modules && manifest.modules[sxbcFile]) { + var hash = manifest.modules[sxbcFile]; + var lsKey = "sx:h:" + hash; + try { + text = localStorage.getItem(lsKey); + } catch(e) {} + if (!text) { + // Fetch by content hash + try { + var xhr2 = new XMLHttpRequest(); + xhr2.open("GET", "/sx/h/" + hash, false); + xhr2.send(); + if (xhr2.status === 200) { + text = xhr2.responseText; + // Strip comment line if present + if (text.charAt(0) === ';') { + var nl = text.indexOf('\n'); + if (nl >= 0) text = text.substring(nl + 1); + } + try { localStorage.setItem(lsKey, text); } catch(e) {} + } + } catch(e) {} + } + } + + // Fallback: fetch by URL (pre-content-addressed path) + if (!text) { + var url = _baseUrl + sxbcPath + _sxbcCacheBust; + try { + var xhr = new XMLHttpRequest(); + xhr.open("GET", url, false); + xhr.send(); + if (xhr.status !== 200) return null; + text = xhr.responseText; + } catch(e) { return null; } + } + + try { // Parse the sxbc text to get the SX tree - var parsed = K.parse(xhr.responseText); + var parsed = K.parse(text); if (!parsed || !parsed.length) return null; var sxbc = parsed[0]; // (sxbc version hash (code ...)) if (!sxbc || sxbc._type !== "list" || !sxbc.items) return null; @@ -626,10 +668,149 @@ return _symbolIndex; } + // ================================================================ + // Content-addressed definition loader + // + // The page manifest maps component names to content hashes. + // When a ~component symbol is missing, we resolve its hash, + // check localStorage, fetch from /sx/h/{hash} if needed, + // then load the definition (recursively resolving @h: deps). + // ================================================================ + + var _pageManifest = null; // { defs: { "~name": "hash", ... } } + var _hashToName = {}; // hash → "~name" + var _hashCache = {}; // hash → definition text (in-memory) + var _loadedHashes = {}; // hash → true (already K.load'd) + + function loadPageManifest() { + if (_pageManifest) return _pageManifest; + var el = document.querySelector('script[data-sx-manifest]'); + if (!el) return null; + try { + _pageManifest = JSON.parse(el.textContent); + var defs = _pageManifest.defs || {}; + for (var name in defs) { + _hashToName[defs[name]] = name; + } + return _pageManifest; + } catch(e) { + console.warn("[sx] Failed to parse manifest:", e); + return null; + } + } + + function resolveHash(hash) { + // 1. In-memory cache + if (_hashCache[hash]) return _hashCache[hash]; + // 2. localStorage + var key = "sx:h:" + hash; + try { + var cached = localStorage.getItem(key); + if (cached) { + _hashCache[hash] = cached; + return cached; + } + } catch(e) {} + // 3. Fetch from server + try { + var xhr = new XMLHttpRequest(); + xhr.open("GET", "/sx/h/" + hash, false); + xhr.send(); + if (xhr.status === 200) { + var def = xhr.responseText; + _hashCache[hash] = def; + try { localStorage.setItem(key, def); } catch(e) {} + return def; + } + } catch(e) { + console.warn("[sx] Failed to fetch hash " + hash + ":", e); + } + return null; + } + + function loadDefinitionByHash(hash) { + if (_loadedHashes[hash]) return true; + // Mark in-progress immediately to prevent circular recursion + _loadedHashes[hash] = "loading"; + var def = resolveHash(hash); + if (!def) { delete _loadedHashes[hash]; return false; } + + // Strip comment line (;; ~name\n) from start + var src = def; + if (src.charAt(0) === ';') { + var nl = src.indexOf('\n'); + if (nl >= 0) src = src.substring(nl + 1); + } + + // Find and recursively load @h: dependencies before loading this one + var hashRe = /@h:([0-9a-f]{16})/g; + var match; + while ((match = hashRe.exec(src)) !== null) { + var depHash = match[1]; + if (!_loadedHashes[depHash]) { + loadDefinitionByHash(depHash); + } + } + + // Rewrite @h:xxx back to ~names for the SX evaluator + var rewritten = src.replace(/@h:([0-9a-f]{16})/g, function(_m, h) { + return _hashToName[h] || ("@h:" + h); + }); + + // Eagerly pre-load any plain manifest symbols referenced in this definition. + // The CEK evaluator doesn't call __resolve-symbol, so deps must be present + // before the definition is called. Scan for word boundaries matching manifest keys. + if (_pageManifest && _pageManifest.defs) { + var words = rewritten.match(/[a-zA-Z_][a-zA-Z0-9_?!-]*/g) || []; + for (var wi = 0; wi < words.length; wi++) { + var w = words[wi]; + if (w !== name && _pageManifest.defs[w] && !_loadedHashes[_pageManifest.defs[w]]) { + loadDefinitionByHash(_pageManifest.defs[w]); + } + } + } + + // Prepend the component name back into the definition. + // Only for single-definition forms (defcomp/defisland/defmacro) where + // the name was stripped for hashing. Multi-define files (client libs) + // already contain named (define name ...) forms. + var name = _hashToName[hash]; + if (name) { + var isMultiDefine = /\(define\s+[a-zA-Z]/.test(rewritten); + if (!isMultiDefine) { + rewritten = rewritten.replace( + /^\((defcomp|defisland|defmacro|define)\s/, + function(_m, kw) { return "(" + kw + " " + name + " "; } + ); + } + } + + try { + K.load(rewritten); + _loadedHashes[hash] = true; + return true; + } catch(e) { + console.warn("[sx] Failed to load hash " + hash + " (" + (name || "?") + "):", e); + return false; + } + } + // Register the resolve hook — called by the VM when GLOBAL_GET fails K.registerNative("__resolve-symbol", function(args) { var name = args[0]; if (!name) return null; + + // Content-addressed resolution — components, libraries, macros + var manifest = loadPageManifest(); + if (manifest && manifest.defs && manifest.defs[name]) { + var hash = manifest.defs[name]; + if (!_loadedHashes[hash]) { + loadDefinitionByHash(hash); + return null; // VM re-lookups after hook + } + } + + // Library-level resolution (existing path — .sxbc modules) var idx = buildSymbolIndex(); if (!idx || !idx[name]) return null; var lib = idx[name]; diff --git a/shared/sx/templates/shell.sx b/shared/sx/templates/shell.sx index 2c7dcbd8..264b2a6a 100644 --- a/shared/sx/templates/shell.sx +++ b/shared/sx/templates/shell.sx @@ -7,6 +7,7 @@ (sx-css :as string?) (component-hash :as string?) (component-defs :as string?) + (component-manifest :as string?) (pages-sx :as string?) (page-sx :as string?) (body-html :as string?) @@ -61,11 +62,12 @@ (style (raw! "[data-sx-island] button,[data-sx-island] a,[data-sx-island] [role=button]{cursor:pointer}")) - (script - :type "text/sx" - :data-components true - :data-hash component-hash - (raw! (or component-defs ""))) + (when + component-manifest + (script + :type "application/json" + :data-sx-manifest true + (raw! component-manifest))) (when init-sx (script :type "text/sx" :data-init true (raw! init-sx))) @@ -74,12 +76,6 @@ :type "text/sx" :data-mount "#sx-root" (raw! (or page-sx ""))) - (<> - (script - :src (str - asset-url - "/wasm/sx_browser.bc.wasm.js?v=" - (or wasm-hash "0"))) - (script - :src (str asset-url "/wasm/sx-platform.js?v=" (or platform-hash "0")) - :data-sxbc-hash (or sxbc-hash "0"))))))) + (script + (raw! + "\n(function(){\n var m=document.querySelector('[data-sx-manifest]');\n if(!m)return;\n var j=JSON.parse(m.textContent);\n\n // Cache API wrapper — intercept .wasm fetches for offline caching.\n if(typeof caches!=='undefined'){\n var _fetch=window.fetch;\n var CACHE='sx-wasm-v1';\n window.fetch=function(input,init){\n var url=(typeof input==='string')?input:\n (input instanceof URL)?input.href:\n (input&&input.url)||'';\n if(url.indexOf('.wasm')!==-1){\n return caches.open(CACHE).then(function(c){\n return c.match(url).then(function(r){\n if(r)return r;\n return _fetch(input,init).then(function(resp){\n if(resp.ok)c.put(url,resp.clone());\n return resp;\n });\n });\n });\n }\n return _fetch(input,init);\n };\n }\n\n // Content-addressed boot: load kernel + platform by hash\n if(!j.boot)return;\n j.boot.forEach(function(h){\n var s=document.createElement('script');\n s.src='/sx/h/'+h;\n document.head.appendChild(s);\n });\n})();\n"))))))