Length-prefixed binary framing for OCaml↔Python pipe
Replace newline-delimited text protocol with length-prefixed blobs for all response data (send_ok_string, send_ok_raw). The OCaml side sends (ok-len N)\n followed by exactly N raw bytes + \n. Python reads the length, then readexactly(N). This eliminates all pipe desync issues: - No escaping needed for any content (HTML, SX with newlines, quotes) - No size limits (1MB+ responses work cleanly) - No multi-line response splitting - No double-escaping bugs The old (ok "...") and (ok-raw ...) formats are still parsed as fallbacks for backward compatibility. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -68,24 +68,24 @@ let send line =
|
|||||||
|
|
||||||
let send_ok () = send "(ok)"
|
let send_ok () = send "(ok)"
|
||||||
let send_ok_value v = send (Printf.sprintf "(ok %s)" (serialize_value v))
|
let send_ok_value v = send (Printf.sprintf "(ok %s)" (serialize_value v))
|
||||||
let send_ok_string s = send (Printf.sprintf "(ok \"%s\")" (escape_sx_string s))
|
|
||||||
let send_error msg = send (Printf.sprintf "(error \"%s\")" (escape_sx_string msg))
|
let send_error msg = send (Printf.sprintf "(error \"%s\")" (escape_sx_string msg))
|
||||||
|
|
||||||
(** Send ok-raw, ensuring single-line output.
|
(** Length-prefixed binary send — handles any content without escaping.
|
||||||
SX wire format from aser may contain newlines inside string literals.
|
Sends: (ok-len N)\n followed by exactly N bytes of raw data, then \n.
|
||||||
We must escape those to prevent pipe desync (Python reads one line
|
Python reads the length line, then reads exactly N bytes. *)
|
||||||
at a time), but we can't blindly replace newlines in the raw SX
|
let send_ok_blob s =
|
||||||
because that would break string content.
|
let n = String.length s in
|
||||||
|
Printf.printf "(ok-len %d)\n" n;
|
||||||
|
print_string s;
|
||||||
|
print_char '\n';
|
||||||
|
flush stdout
|
||||||
|
|
||||||
Strategy: wrap as a properly escaped string literal.
|
(** Send a string value — use blob for anything that might contain
|
||||||
Python side will unescape it. *)
|
newlines, quotes, or be large. *)
|
||||||
let send_ok_raw s =
|
let send_ok_string s = send_ok_blob s
|
||||||
(* If the result has no newlines, send as-is for backward compat *)
|
|
||||||
if not (String.contains s '\n') then
|
(** Send raw SX wire format — may contain newlines in string literals. *)
|
||||||
send (Printf.sprintf "(ok-raw %s)" s)
|
let send_ok_raw s = send_ok_blob s
|
||||||
else
|
|
||||||
(* Wrap as escaped string so newlines are preserved *)
|
|
||||||
send (Printf.sprintf "(ok \"%s\")" (escape_sx_string s))
|
|
||||||
|
|
||||||
|
|
||||||
(* ====================================================================== *)
|
(* ====================================================================== *)
|
||||||
|
|||||||
@@ -497,6 +497,13 @@ class OcamlBridge:
|
|||||||
Returns (kind, value) where kind is "ok" or "error".
|
Returns (kind, value) where kind is "ok" or "error".
|
||||||
"""
|
"""
|
||||||
line = await self._readline()
|
line = await self._readline()
|
||||||
|
# Length-prefixed blob
|
||||||
|
if line.startswith("(ok-len "):
|
||||||
|
n = int(line[8:-1])
|
||||||
|
assert self._proc and self._proc.stdout
|
||||||
|
data = await self._proc.stdout.readexactly(n)
|
||||||
|
await self._proc.stdout.readline() # trailing newline
|
||||||
|
return ("ok", data.decode())
|
||||||
return _parse_response(line)
|
return _parse_response(line)
|
||||||
|
|
||||||
async def _read_until_ok(
|
async def _read_until_ok(
|
||||||
@@ -547,6 +554,15 @@ class OcamlBridge:
|
|||||||
pending_batch = []
|
pending_batch = []
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Length-prefixed blob: (ok-len N)
|
||||||
|
if line.startswith("(ok-len "):
|
||||||
|
n = int(line[8:-1])
|
||||||
|
assert self._proc and self._proc.stdout
|
||||||
|
data = await self._proc.stdout.readexactly(n)
|
||||||
|
# Read trailing newline
|
||||||
|
await self._proc.stdout.readline()
|
||||||
|
return data.decode()
|
||||||
|
|
||||||
kind, value = _parse_response(line)
|
kind, value = _parse_response(line)
|
||||||
if kind == "error":
|
if kind == "error":
|
||||||
raise OcamlBridgeError(value or "Unknown error")
|
raise OcamlBridgeError(value or "Unknown error")
|
||||||
|
|||||||
Reference in New Issue
Block a user