host-persist: content-addressed blob adapter — Blocker CLOSED
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 40s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 40s
blob/put|get|has? backed by <root>/blobs/<cid>, CIDv1 (raw codec,
sha2-256 via Sx_cid/Sx_sha2). put idempotent; persist stores only the
{:cid :size :mime} ref. persist_durable_test.sh extended (8/8): blob
round-trip + content-address idempotency + bytes/ref surviving real
restart. Mock blob suite 14/0 on worktree binary. Durable-storage
Blocker now CLOSED.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -45,6 +45,7 @@ let rec ensure_dir dir =
|
||||
|
||||
let streams_dir () = Filename.concat (root_dir ()) "streams"
|
||||
let kv_dir () = Filename.concat (root_dir ()) "kv"
|
||||
let blobs_dir () = Filename.concat (root_dir ()) "blobs"
|
||||
|
||||
let read_file path =
|
||||
let ic = open_in_bin path in
|
||||
@@ -227,6 +228,32 @@ let do_kv_keys () =
|
||||
|> List.sort String.compare
|
||||
|> List.map (fun s -> String s))
|
||||
|
||||
(* ---- blob store (content-addressed) ------------------------------------ *)
|
||||
(* Same pattern as the persist ops, but a SEPARATE adapter: large objects live
|
||||
in a content-addressed directory keyed by a CIDv1 (raw codec, sha2-256).
|
||||
persist only ever stores the returned ref ({:cid :size :mime}), never bytes.
|
||||
blob/put is idempotent — identical bytes hash to the same cid + same file. *)
|
||||
|
||||
let codec_raw = 0x55
|
||||
|
||||
let blob_cid bytes =
|
||||
let digest = Sx_cid.unhex (Sx_sha2.sha256_hex bytes) in
|
||||
Sx_cid.cidv1 codec_raw (Sx_cid.multihash Sx_cid.mh_sha2_256 digest)
|
||||
|
||||
let blob_path cid = Filename.concat (blobs_dir ()) cid
|
||||
|
||||
let do_blob_put bytes =
|
||||
let cid = blob_cid bytes in
|
||||
let p = blob_path cid in
|
||||
if not (Sys.file_exists p) then write_file_atomic p bytes;
|
||||
String cid
|
||||
|
||||
let do_blob_get cid =
|
||||
let p = blob_path cid in
|
||||
if Sys.file_exists p then String (read_file p) else Nil
|
||||
|
||||
let do_blob_has cid = Bool (Sys.file_exists (blob_path cid))
|
||||
|
||||
(* ---- dispatch ---------------------------------------------------------- *)
|
||||
|
||||
let arglist = function
|
||||
@@ -257,4 +284,10 @@ let handle_op op args =
|
||||
| "persist/kv-has?" ->
|
||||
(match a with key :: _ -> Some (do_kv_has (str key)) | _ -> Some (Bool false))
|
||||
| "persist/kv-keys" -> Some (do_kv_keys ())
|
||||
| "blob/put" ->
|
||||
(match a with bytes :: _ -> Some (do_blob_put (str bytes)) | _ -> Some Nil)
|
||||
| "blob/get" ->
|
||||
(match a with cid :: _ -> Some (do_blob_get (str cid)) | _ -> Some Nil)
|
||||
| "blob/has?" ->
|
||||
(match a with cid :: _ -> Some (do_blob_has (str cid)) | _ -> Some (Bool false))
|
||||
| _ -> None
|
||||
|
||||
@@ -46,6 +46,7 @@ PRELUDE='(epoch 1)
|
||||
(load "lib/persist/log.sx")
|
||||
(load "lib/persist/kv.sx")
|
||||
(load "lib/persist/durable.sx")
|
||||
(load "lib/persist/blob.sx")
|
||||
(epoch 2)'
|
||||
|
||||
# run_eval <sx-expr-string>: prints the final (ok-len 2 ...) payload line.
|
||||
@@ -113,6 +114,31 @@ GOT=$(run_eval "$(q '(let ((b (persist/durable-backend)))
|
||||
(persist/kv-get b "survive")))')")
|
||||
check "recovered after restart" "$GOT" '(2 2 2 "yes")'
|
||||
|
||||
echo "== blob: content-addressed put/get/has? round-trip =="
|
||||
GOT=$(run_eval "$(q '(let ((bs (persist/blob-store-backend)))
|
||||
(let ((r (persist/blob-store bs "hello world" "text/plain")))
|
||||
(list (persist/blob-size r)
|
||||
(persist/blob-mime r)
|
||||
(persist/blob-fetch bs r)
|
||||
(persist/blob-exists? bs r))))')")
|
||||
check "blob size/mime/fetch/exists" "$GOT" '(11 "text/plain" "hello world" true)'
|
||||
|
||||
echo "== blob: put is content-addressed (idempotent cid) =="
|
||||
GOT=$(run_eval "$(q '(let ((bs (persist/blob-store-backend)))
|
||||
(equal? (persist/blob-cid (persist/blob-store bs "same bytes" "x"))
|
||||
(persist/blob-cid (persist/blob-store bs "same bytes" "x"))))')")
|
||||
check "same bytes -> same cid" "$GOT" "true"
|
||||
|
||||
echo "== blob: bytes + ref-in-kv survive a REAL restart =="
|
||||
# process A: store a blob, keep only its ref in the durable kv.
|
||||
run_eval "$(q '(let ((b (persist/durable-backend)) (bs (persist/blob-store-backend)))
|
||||
(begin (persist/kv-put b "logo" (persist/blob-store bs "PNGDATA" "image/png")) nil))')" >/dev/null
|
||||
# fresh process: read the ref from kv, fetch the bytes from the blob store.
|
||||
GOT=$(run_eval "$(q '(let ((b (persist/durable-backend)) (bs (persist/blob-store-backend)))
|
||||
(let ((r (persist/kv-get b "logo")))
|
||||
(list (persist/blob-fetch bs r) (persist/blob-exists? bs r) (persist/blob-mime r))))')")
|
||||
check "blob recovered via ref after restart" "$GOT" '("PNGDATA" true "image/png")'
|
||||
|
||||
echo
|
||||
echo "durable adapter: $PASS passed, $FAIL failed"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
|
||||
@@ -197,6 +197,16 @@ durable backend. Other subsystem loops copy this pattern; it does not touch the
|
||||
real `lib/acl`.
|
||||
|
||||
## Progress log
|
||||
- **Host durable-storage adapter — blob adapter LIVE, Blocker CLOSED (8/8).**
|
||||
Added content-addressed `blob/put|get|has?` to `sx_persist_store.ml`: bytes
|
||||
land in `<root>/blobs/<cid>` keyed by a CIDv1 (raw codec, sha2-256 via
|
||||
`Sx_cid`/`Sx_sha2`); `put` is idempotent (identical bytes → same cid → same
|
||||
file); persist stores only the `{:cid :size :mime}` ref, never the bytes. The
|
||||
`(eval ...)`/bridge/resolver fall-through already routes any unowned op to the
|
||||
store, so no new wiring was needed. `persist_durable_test.sh` extended:
|
||||
blob round-trip, content-address idempotency, and bytes+ref-in-kv surviving a
|
||||
real process restart — all green. Existing mock blob suite 14/0 against the
|
||||
worktree binary. The host durable-storage Blocker is now CLOSED.
|
||||
- **Host durable-storage adapter — durable+recovery LIVE (5/5 acceptance).**
|
||||
`hosts/ocaml/lib/sx_persist_store.ml` services every `persist/*` IO op against
|
||||
real on-disk storage (append-only log + separate monotonic `.seq` high-water
|
||||
@@ -323,20 +333,26 @@ real `lib/acl`.
|
||||
|
||||
## Blockers
|
||||
|
||||
### IN PROGRESS — host durable-storage adapter (the only gap to real durability)
|
||||
### CLOSED — host durable-storage adapter (real durability is live)
|
||||
|
||||
**Owner:** the `loops/host-persist` loop (`hosts/**` scope; `lib/persist/**` is
|
||||
its contract, not its code).
|
||||
|
||||
**Status (2026-06-06):** durable `persist/*` ops DONE — implemented in
|
||||
`hosts/ocaml/lib/sx_persist_store.ml`, wired into all three IO sites of
|
||||
`hosts/ocaml/bin/sx_server.ml`, acceptance + real-restart recovery green
|
||||
(`hosts/ocaml/test/persist_durable_test.sh`, 5/5). The silent-data-loss repro
|
||||
below now returns the correct values. **Remaining before CLOSED:** the blob
|
||||
adapter (`blob/put|get|has?`, see "Blobs" below). Original spec retained for
|
||||
reference.
|
||||
**Resolution (2026-06-06):** `hosts/ocaml/lib/sx_persist_store.ml` services every
|
||||
`persist/*` AND `blob/*` IO op against real on-disk storage, wired into all three
|
||||
IO sites of `hosts/ocaml/bin/sx_server.ml` (the `(eval ...)` suspension loop, the
|
||||
`cek_run_with_io` bridge, and the in-process `_cek_io_resolver`). The
|
||||
silent-data-loss repro below now returns the correct values instead of
|
||||
`(1 0 nil)`. Acceptance + real-process-restart recovery green
|
||||
(`hosts/ocaml/test/persist_durable_test.sh`, 8/8): durable round-trip,
|
||||
monotonic-seq-across-truncate, streams-survive-compaction, kv, blob
|
||||
content-addressing/idempotency, and bytes+refs surviving an actual process
|
||||
restart. Existing mock suites unaffected (blob 14/0, durable 10/0, recovery,
|
||||
against the worktree binary). A subsystem migrated per
|
||||
`lib/persist/examples/acl.sx` is now genuinely durable. Original spec retained
|
||||
below for reference.
|
||||
|
||||
**Without it, durable persistence silently drops all writes.**
|
||||
**Was:** without it, durable persistence silently dropped all writes.
|
||||
|
||||
**Symptom / minimal repro.** `persist/durable-backend` performs
|
||||
`{:op "persist/..." :args (...)}` for every storage op. Under `sx_server.exe`
|
||||
|
||||
Reference in New Issue
Block a user