Non-blocking batch IO for OCaml kernel + stable component hash

OCaml kernel (sx_server.ml): - Batch IO mode for aser-slot: batchable helpers (highlight, component-source) return placeholders during evaluation instead of blocking on stdin. After aser completes, all batched requests are flushed to Python at once. - Python processes them concurrently with asyncio.gather. - Placeholders (using «IO:N» markers) are replaced with actual values in the result string. - Non-batchable IO (query, action, ctx, request-arg) still uses blocking mode — their results drive control flow. Python bridge (ocaml_bridge.py): - _read_until_ok handles batched protocol: collects io-request lines with numeric IDs, processes on (io-done N) with gather. - IO result cache for pure helpers — eliminates redundant calls. - _handle_io_request strips batch ID from request format. Component caching (jinja_bridge.py): - Hash computed from FULL component env (all names + bodies), not per-page subset. Stable across all pages — browser caches once, no re-download on navigation between pages. - invalidate_component_hash() called on hot-reload. Tests: 15/15 OCaml helper tests pass (2 new batch IO tests). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 16:53:01 +00:00
parent d3b3b4b720
commit 96e7bbbac1
4 changed files with 423 additions and 58 deletions
--- a/shared/sx/ocaml_bridge.py
+++ b/shared/sx/ocaml_bridge.py
@@ -44,6 +44,7 @@ class OcamlBridge:
        self._started = False
        self._components_loaded = False
        self._helpers_injected = False
+        self._io_cache: dict[tuple, Any] = {}  # (name, args...) → cached result

    async def start(self) -> None:
        """Launch the OCaml subprocess and wait for (ready)."""
@@ -336,22 +337,48 @@ class OcamlBridge:
    ) -> str:
        """Read lines until (ok ...) or (error ...).

-        Handles (io-request ...) by fulfilling IO and sending (io-response ...).
-        ALWAYS sends a response to keep the pipe clean, even on error.
+        Handles IO requests in two modes:
+        - Legacy (blocking): single io-request → immediate io-response
+        - Batched: collect io-requests until (io-done N), process ALL
+          concurrently with asyncio.gather, send responses in order
        """
+        import asyncio
+        pending_batch: list[str] = []
+
        while True:
            line = await self._readline()

            if line.startswith("(io-request "):
+                # Check if batched (has numeric ID after "io-request ")
+                after = line[len("(io-request "):].lstrip()
+                if after and after[0].isdigit():
+                    # Batched mode — collect, don't respond yet
+                    pending_batch.append(line)
+                    continue
+                # Legacy blocking mode — respond immediately
                try:
                    result = await self._handle_io_request(line, ctx)
                    await self._send(f"(io-response {_serialize_for_ocaml(result)})")
                except Exception as e:
-                    # MUST send a response or the pipe desyncs
                    _logger.warning("IO request failed, sending nil: %s", e)
                    await self._send("(io-response nil)")
                continue

+            if line.startswith("(io-done "):
+                # Batch complete — process all pending IO concurrently
+                tasks = [self._handle_io_request(req, ctx)
+                         for req in pending_batch]
+                results = await asyncio.gather(*tasks, return_exceptions=True)
+                for result in results:
+                    if isinstance(result, BaseException):
+                        _logger.warning("Batched IO failed: %s", result)
+                        await self._send("(io-response nil)")
+                    else:
+                        await self._send(
+                            f"(io-response {_serialize_for_ocaml(result)})")
+                pending_batch = []
+                continue
+
            kind, value = _parse_response(line)
            if kind == "error":
                raise OcamlBridgeError(value or "Unknown error")
@@ -372,12 +399,17 @@ class OcamlBridge:
            raise OcamlBridgeError(f"Malformed io-request: {line}")

        parts = parsed[0]
-        # parts = [Symbol("io-request"), name_str, ...args]
+        # Legacy: [Symbol("io-request"), name_str, ...args]
+        # Batched: [Symbol("io-request"), id_num, name_str, ...args]
        if len(parts) < 2:
            raise OcamlBridgeError(f"Malformed io-request: {line}")

-        req_name = _to_str(parts[1])
-        args = parts[2:]
+        # Skip numeric batch ID if present
+        offset = 1
+        if isinstance(parts[1], (int, float)):
+            offset = 2
+        req_name = _to_str(parts[offset])
+        args = parts[offset + 1:]

        if req_name == "query":
            return await self._io_query(args)
@@ -442,12 +474,23 @@ class OcamlBridge:
        key = _to_str(args[0]) if args else ""
        return ctx.get(key)

+    # Helpers that are pure functions — safe to cache by args.
+    _CACHEABLE_HELPERS = frozenset({
+        "highlight", "component-source", "primitives-data",
+        "special-forms-data", "reference-data", "read-spec-file",
+        "bootstrapper-data", "bundle-analyzer-data", "routing-analyzer-data",
+    })
+
    async def _io_helper(self, args: list, ctx: dict[str, Any] | None) -> Any:
        """Handle (io-request "helper" name arg1 arg2 ...).

        Dispatches to registered page helpers — Python functions like
        read-spec-file, bootstrapper-data, etc.  The helper service name
        is passed via ctx["_helper_service"].
+
+        Pure helpers (highlight etc.) are cached — same input always
+        produces same output.  Eliminates blocking round-trips for
+        repeat calls across pages.
        """
        import asyncio
        from .pages import get_page_helpers
@@ -456,6 +499,12 @@ class OcamlBridge:
        name = _to_str(args[0]) if args else ""
        helper_args = [_to_python(a) for a in args[1:]]

+        # Cache lookup for pure helpers
+        if name in self._CACHEABLE_HELPERS:
+            cache_key = (name, *[repr(a) for a in helper_args])
+            if cache_key in self._io_cache:
+                return self._io_cache[cache_key]
+
        # Check page helpers first (application-level)
        service = (ctx or {}).get("_helper_service", "sx")
        helpers = get_page_helpers(service)
@@ -464,6 +513,9 @@ class OcamlBridge:
            result = fn(*helper_args)
            if asyncio.iscoroutine(result):
                result = await result
+            # Cache pure helper results
+            if name in self._CACHEABLE_HELPERS:
+                self._io_cache[cache_key] = result
            return result

        # Fall back to IO primitives (now, state-get, state-set!, etc.)