From 6d73edf297d7e92fc38d1c93e5925925f2631928 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 20 Mar 2026 12:48:52 +0000 Subject: [PATCH] =?UTF-8?q?Length-prefixed=20binary=20framing=20for=20OCam?= =?UTF-8?q?l=E2=86=94Python=20pipe?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace newline-delimited text protocol with length-prefixed blobs for all response data (send_ok_string, send_ok_raw). The OCaml side sends (ok-len N)\n followed by exactly N raw bytes + \n. Python reads the length, then readexactly(N). This eliminates all pipe desync issues: - No escaping needed for any content (HTML, SX with newlines, quotes) - No size limits (1MB+ responses work cleanly) - No multi-line response splitting - No double-escaping bugs The old (ok "...") and (ok-raw ...) formats are still parsed as fallbacks for backward compatibility. Co-Authored-By: Claude Opus 4.6 (1M context) --- hosts/ocaml/bin/sx_server.ml | 30 +++++++++++++++--------------- shared/sx/ocaml_bridge.py | 16 ++++++++++++++++ 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/hosts/ocaml/bin/sx_server.ml b/hosts/ocaml/bin/sx_server.ml index 0a17aba..cbbf350 100644 --- a/hosts/ocaml/bin/sx_server.ml +++ b/hosts/ocaml/bin/sx_server.ml @@ -68,24 +68,24 @@ let send line = let send_ok () = send "(ok)" let send_ok_value v = send (Printf.sprintf "(ok %s)" (serialize_value v)) -let send_ok_string s = send (Printf.sprintf "(ok \"%s\")" (escape_sx_string s)) let send_error msg = send (Printf.sprintf "(error \"%s\")" (escape_sx_string msg)) -(** Send ok-raw, ensuring single-line output. - SX wire format from aser may contain newlines inside string literals. - We must escape those to prevent pipe desync (Python reads one line - at a time), but we can't blindly replace newlines in the raw SX - because that would break string content. +(** Length-prefixed binary send — handles any content without escaping. + Sends: (ok-len N)\n followed by exactly N bytes of raw data, then \n. + Python reads the length line, then reads exactly N bytes. *) +let send_ok_blob s = + let n = String.length s in + Printf.printf "(ok-len %d)\n" n; + print_string s; + print_char '\n'; + flush stdout - Strategy: wrap as a properly escaped string literal. - Python side will unescape it. *) -let send_ok_raw s = - (* If the result has no newlines, send as-is for backward compat *) - if not (String.contains s '\n') then - send (Printf.sprintf "(ok-raw %s)" s) - else - (* Wrap as escaped string so newlines are preserved *) - send (Printf.sprintf "(ok \"%s\")" (escape_sx_string s)) +(** Send a string value — use blob for anything that might contain + newlines, quotes, or be large. *) +let send_ok_string s = send_ok_blob s + +(** Send raw SX wire format — may contain newlines in string literals. *) +let send_ok_raw s = send_ok_blob s (* ====================================================================== *) diff --git a/shared/sx/ocaml_bridge.py b/shared/sx/ocaml_bridge.py index cbedeaa..5ebdfc7 100644 --- a/shared/sx/ocaml_bridge.py +++ b/shared/sx/ocaml_bridge.py @@ -497,6 +497,13 @@ class OcamlBridge: Returns (kind, value) where kind is "ok" or "error". """ line = await self._readline() + # Length-prefixed blob + if line.startswith("(ok-len "): + n = int(line[8:-1]) + assert self._proc and self._proc.stdout + data = await self._proc.stdout.readexactly(n) + await self._proc.stdout.readline() # trailing newline + return ("ok", data.decode()) return _parse_response(line) async def _read_until_ok( @@ -547,6 +554,15 @@ class OcamlBridge: pending_batch = [] continue + # Length-prefixed blob: (ok-len N) + if line.startswith("(ok-len "): + n = int(line[8:-1]) + assert self._proc and self._proc.stdout + data = await self._proc.stdout.readexactly(n) + # Read trailing newline + await self._proc.stdout.readline() + return data.decode() + kind, value = _parse_response(line) if kind == "error": raise OcamlBridgeError(value or "Unknown error")