diff --git a/hosts/ocaml/lib/sx_persist_store.ml b/hosts/ocaml/lib/sx_persist_store.ml index 2a7086f7..0ee616bf 100644 --- a/hosts/ocaml/lib/sx_persist_store.ml +++ b/hosts/ocaml/lib/sx_persist_store.ml @@ -45,6 +45,7 @@ let rec ensure_dir dir = let streams_dir () = Filename.concat (root_dir ()) "streams" let kv_dir () = Filename.concat (root_dir ()) "kv" +let blobs_dir () = Filename.concat (root_dir ()) "blobs" let read_file path = let ic = open_in_bin path in @@ -227,6 +228,32 @@ let do_kv_keys () = |> List.sort String.compare |> List.map (fun s -> String s)) +(* ---- blob store (content-addressed) ------------------------------------ *) +(* Same pattern as the persist ops, but a SEPARATE adapter: large objects live + in a content-addressed directory keyed by a CIDv1 (raw codec, sha2-256). + persist only ever stores the returned ref ({:cid :size :mime}), never bytes. + blob/put is idempotent — identical bytes hash to the same cid + same file. *) + +let codec_raw = 0x55 + +let blob_cid bytes = + let digest = Sx_cid.unhex (Sx_sha2.sha256_hex bytes) in + Sx_cid.cidv1 codec_raw (Sx_cid.multihash Sx_cid.mh_sha2_256 digest) + +let blob_path cid = Filename.concat (blobs_dir ()) cid + +let do_blob_put bytes = + let cid = blob_cid bytes in + let p = blob_path cid in + if not (Sys.file_exists p) then write_file_atomic p bytes; + String cid + +let do_blob_get cid = + let p = blob_path cid in + if Sys.file_exists p then String (read_file p) else Nil + +let do_blob_has cid = Bool (Sys.file_exists (blob_path cid)) + (* ---- dispatch ---------------------------------------------------------- *) let arglist = function @@ -257,4 +284,10 @@ let handle_op op args = | "persist/kv-has?" -> (match a with key :: _ -> Some (do_kv_has (str key)) | _ -> Some (Bool false)) | "persist/kv-keys" -> Some (do_kv_keys ()) + | "blob/put" -> + (match a with bytes :: _ -> Some (do_blob_put (str bytes)) | _ -> Some Nil) + | "blob/get" -> + (match a with cid :: _ -> Some (do_blob_get (str cid)) | _ -> Some Nil) + | "blob/has?" -> + (match a with cid :: _ -> Some (do_blob_has (str cid)) | _ -> Some (Bool false)) | _ -> None diff --git a/hosts/ocaml/test/persist_durable_test.sh b/hosts/ocaml/test/persist_durable_test.sh index 45b3fcbd..8f9253ea 100755 --- a/hosts/ocaml/test/persist_durable_test.sh +++ b/hosts/ocaml/test/persist_durable_test.sh @@ -46,6 +46,7 @@ PRELUDE='(epoch 1) (load "lib/persist/log.sx") (load "lib/persist/kv.sx") (load "lib/persist/durable.sx") +(load "lib/persist/blob.sx") (epoch 2)' # run_eval : prints the final (ok-len 2 ...) payload line. @@ -113,6 +114,31 @@ GOT=$(run_eval "$(q '(let ((b (persist/durable-backend))) (persist/kv-get b "survive")))')") check "recovered after restart" "$GOT" '(2 2 2 "yes")' +echo "== blob: content-addressed put/get/has? round-trip ==" +GOT=$(run_eval "$(q '(let ((bs (persist/blob-store-backend))) + (let ((r (persist/blob-store bs "hello world" "text/plain"))) + (list (persist/blob-size r) + (persist/blob-mime r) + (persist/blob-fetch bs r) + (persist/blob-exists? bs r))))')") +check "blob size/mime/fetch/exists" "$GOT" '(11 "text/plain" "hello world" true)' + +echo "== blob: put is content-addressed (idempotent cid) ==" +GOT=$(run_eval "$(q '(let ((bs (persist/blob-store-backend))) + (equal? (persist/blob-cid (persist/blob-store bs "same bytes" "x")) + (persist/blob-cid (persist/blob-store bs "same bytes" "x"))))')") +check "same bytes -> same cid" "$GOT" "true" + +echo "== blob: bytes + ref-in-kv survive a REAL restart ==" +# process A: store a blob, keep only its ref in the durable kv. +run_eval "$(q '(let ((b (persist/durable-backend)) (bs (persist/blob-store-backend))) + (begin (persist/kv-put b "logo" (persist/blob-store bs "PNGDATA" "image/png")) nil))')" >/dev/null +# fresh process: read the ref from kv, fetch the bytes from the blob store. +GOT=$(run_eval "$(q '(let ((b (persist/durable-backend)) (bs (persist/blob-store-backend))) + (let ((r (persist/kv-get b "logo"))) + (list (persist/blob-fetch bs r) (persist/blob-exists? bs r) (persist/blob-mime r))))')") +check "blob recovered via ref after restart" "$GOT" '("PNGDATA" true "image/png")' + echo echo "durable adapter: $PASS passed, $FAIL failed" [ "$FAIL" -eq 0 ] diff --git a/plans/persist-on-sx.md b/plans/persist-on-sx.md index 0909969d..0fbcedc3 100644 --- a/plans/persist-on-sx.md +++ b/plans/persist-on-sx.md @@ -197,6 +197,16 @@ durable backend. Other subsystem loops copy this pattern; it does not touch the real `lib/acl`. ## Progress log +- **Host durable-storage adapter — blob adapter LIVE, Blocker CLOSED (8/8).** + Added content-addressed `blob/put|get|has?` to `sx_persist_store.ml`: bytes + land in `/blobs/` keyed by a CIDv1 (raw codec, sha2-256 via + `Sx_cid`/`Sx_sha2`); `put` is idempotent (identical bytes → same cid → same + file); persist stores only the `{:cid :size :mime}` ref, never the bytes. The + `(eval ...)`/bridge/resolver fall-through already routes any unowned op to the + store, so no new wiring was needed. `persist_durable_test.sh` extended: + blob round-trip, content-address idempotency, and bytes+ref-in-kv surviving a + real process restart — all green. Existing mock blob suite 14/0 against the + worktree binary. The host durable-storage Blocker is now CLOSED. - **Host durable-storage adapter — durable+recovery LIVE (5/5 acceptance).** `hosts/ocaml/lib/sx_persist_store.ml` services every `persist/*` IO op against real on-disk storage (append-only log + separate monotonic `.seq` high-water @@ -323,20 +333,26 @@ real `lib/acl`. ## Blockers -### IN PROGRESS — host durable-storage adapter (the only gap to real durability) +### CLOSED — host durable-storage adapter (real durability is live) **Owner:** the `loops/host-persist` loop (`hosts/**` scope; `lib/persist/**` is its contract, not its code). -**Status (2026-06-06):** durable `persist/*` ops DONE — implemented in -`hosts/ocaml/lib/sx_persist_store.ml`, wired into all three IO sites of -`hosts/ocaml/bin/sx_server.ml`, acceptance + real-restart recovery green -(`hosts/ocaml/test/persist_durable_test.sh`, 5/5). The silent-data-loss repro -below now returns the correct values. **Remaining before CLOSED:** the blob -adapter (`blob/put|get|has?`, see "Blobs" below). Original spec retained for -reference. +**Resolution (2026-06-06):** `hosts/ocaml/lib/sx_persist_store.ml` services every +`persist/*` AND `blob/*` IO op against real on-disk storage, wired into all three +IO sites of `hosts/ocaml/bin/sx_server.ml` (the `(eval ...)` suspension loop, the +`cek_run_with_io` bridge, and the in-process `_cek_io_resolver`). The +silent-data-loss repro below now returns the correct values instead of +`(1 0 nil)`. Acceptance + real-process-restart recovery green +(`hosts/ocaml/test/persist_durable_test.sh`, 8/8): durable round-trip, +monotonic-seq-across-truncate, streams-survive-compaction, kv, blob +content-addressing/idempotency, and bytes+refs surviving an actual process +restart. Existing mock suites unaffected (blob 14/0, durable 10/0, recovery, +against the worktree binary). A subsystem migrated per +`lib/persist/examples/acl.sx` is now genuinely durable. Original spec retained +below for reference. -**Without it, durable persistence silently drops all writes.** +**Was:** without it, durable persistence silently dropped all writes. **Symptom / minimal repro.** `persist/durable-backend` performs `{:op "persist/..." :args (...)}` for every storage op. Under `sx_server.exe`