Length-prefixed binary framing for OCaml↔Python pipe

Replace newline-delimited text protocol with length-prefixed blobs
for all response data (send_ok_string, send_ok_raw). The OCaml side
sends (ok-len N)\n followed by exactly N raw bytes + \n. Python reads
the length, then readexactly(N).

This eliminates all pipe desync issues:
- No escaping needed for any content (HTML, SX with newlines, quotes)
- No size limits (1MB+ responses work cleanly)
- No multi-line response splitting
- No double-escaping bugs

The old (ok "...") and (ok-raw ...) formats are still parsed as
fallbacks for backward compatibility.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-20 12:48:52 +00:00
parent 373a4f0134
commit 6d73edf297
2 changed files with 31 additions and 15 deletions

View File

@@ -68,24 +68,24 @@ let send line =
let send_ok () = send "(ok)"
let send_ok_value v = send (Printf.sprintf "(ok %s)" (serialize_value v))
let send_ok_string s = send (Printf.sprintf "(ok \"%s\")" (escape_sx_string s))
let send_error msg = send (Printf.sprintf "(error \"%s\")" (escape_sx_string msg))
(** Send ok-raw, ensuring single-line output.
SX wire format from aser may contain newlines inside string literals.
We must escape those to prevent pipe desync (Python reads one line
at a time), but we can't blindly replace newlines in the raw SX
because that would break string content.
(** Length-prefixed binary send — handles any content without escaping.
Sends: (ok-len N)\n followed by exactly N bytes of raw data, then \n.
Python reads the length line, then reads exactly N bytes. *)
let send_ok_blob s =
let n = String.length s in
Printf.printf "(ok-len %d)\n" n;
print_string s;
print_char '\n';
flush stdout
Strategy: wrap as a properly escaped string literal.
Python side will unescape it. *)
let send_ok_raw s =
(* If the result has no newlines, send as-is for backward compat *)
if not (String.contains s '\n') then
send (Printf.sprintf "(ok-raw %s)" s)
else
(* Wrap as escaped string so newlines are preserved *)
send (Printf.sprintf "(ok \"%s\")" (escape_sx_string s))
(** Send a string value — use blob for anything that might contain
newlines, quotes, or be large. *)
let send_ok_string s = send_ok_blob s
(** Send raw SX wire format — may contain newlines in string literals. *)
let send_ok_raw s = send_ok_blob s
(* ====================================================================== *)

View File

@@ -497,6 +497,13 @@ class OcamlBridge:
Returns (kind, value) where kind is "ok" or "error".
"""
line = await self._readline()
# Length-prefixed blob
if line.startswith("(ok-len "):
n = int(line[8:-1])
assert self._proc and self._proc.stdout
data = await self._proc.stdout.readexactly(n)
await self._proc.stdout.readline() # trailing newline
return ("ok", data.decode())
return _parse_response(line)
async def _read_until_ok(
@@ -547,6 +554,15 @@ class OcamlBridge:
pending_batch = []
continue
# Length-prefixed blob: (ok-len N)
if line.startswith("(ok-len "):
n = int(line[8:-1])
assert self._proc and self._proc.stdout
data = await self._proc.stdout.readexactly(n)
# Read trailing newline
await self._proc.stdout.readline()
return data.decode()
kind, value = _parse_response(line)
if kind == "error":
raise OcamlBridgeError(value or "Unknown error")