diff --git a/lib/persist/blob.sx b/lib/persist/blob.sx new file mode 100644 index 00000000..97d8a5b2 --- /dev/null +++ b/lib/persist/blob.sx @@ -0,0 +1,66 @@ +; persist/blob — large objects (images, media) are NOT persist's to hold. They +; live in a content-addressed store (artdag/IPFS); persist stores only a +; reference: {:cid :size :mime}. The blob store is a SEPARATE injected +; dependency with its own transport (perform in production, a mock content store +; in tests), distinct from the event/kv backend. The invariant: a blob ref that +; lands in the log or kv carries the CID + metadata and never the bytes. +; Requires: lib/persist/backend.sx. + +(define persist/blob-ref (fn (cid size mime) {:mime mime :size size :cid cid})) +(define persist/blob-ref? (fn (r) (has-key? r :cid))) +(define persist/blob-cid (fn (r) (get r :cid))) +(define persist/blob-size (fn (r) (get r :size))) +(define persist/blob-mime (fn (r) (get r :mime))) + +; blob store protocol over an injectable transport +(define persist/blob-io (fn (transport) {:put (fn (bytes mime) (transport {:op "blob/put" :args (list bytes mime)})) :get (fn (cid) (transport {:op "blob/get" :args (list cid)})) :has? (fn (cid) (transport {:op "blob/has?" :args (list cid)}))})) + +; production blob store — transport is the kernel's perform +(define + persist/blob-store-backend + (fn () (persist/blob-io (fn (req) (perform req))))) + +; store bytes via the blob backend; return ONLY the ref (cid + metadata) — this +; is what the caller persists in the log/kv. The bytes never enter persist. +(define + persist/blob-store + (fn + (blob bytes mime) + (let + ((cid ((get blob :put) bytes mime))) + (persist/blob-ref cid (len bytes) mime)))) + +(define + persist/blob-fetch + (fn (blob ref) ((get blob :get) (persist/blob-cid ref)))) +(define + persist/blob-exists? + (fn (blob ref) ((get blob :has?) (persist/blob-cid ref)))) + +; mock content-addressed store (stands in for artdag/IPFS). CID is a +; deterministic content address: identical bytes dedupe to one CID. A real +; store computes a SHA3/IPFS CID host-side; the prefix keeps the mock readable. +(define persist/blob-cid-of (fn (bytes) (str "cid:" bytes))) + +(define + persist/blob-serve + (fn + (store req) + (let + ((op (get req :op)) (args (get req :args))) + (cond + ((equal? op "blob/put") + (let + ((cid (persist/blob-cid-of (first args)))) + (begin (persist/backend-kv-put store cid (first args)) cid))) + ((equal? op "blob/get") (persist/backend-kv-get store (first args))) + ((equal? op "blob/has?") + (persist/backend-kv-has? store (first args))) + (else (error (str "persist/blob-serve: unknown op " op))))))) + +(define + persist/blob-mock-transport + (fn (store) (fn (req) (persist/blob-serve store req)))) +(define + persist/mock-blob + (fn (store) (persist/blob-io (persist/blob-mock-transport store)))) diff --git a/lib/persist/conformance.sh b/lib/persist/conformance.sh index 17904f3b..847e2af1 100755 --- a/lib/persist/conformance.sh +++ b/lib/persist/conformance.sh @@ -13,7 +13,7 @@ if [ ! -x "$SX_SERVER" ]; then exit 1 fi -SUITES=(event log kv project subscribe concurrency snapshot compaction durable) +SUITES=(event log kv project subscribe concurrency snapshot compaction durable blob) OUT_JSON="lib/persist/scoreboard.json" OUT_MD="lib/persist/scoreboard.md" @@ -36,6 +36,7 @@ run_suite() { (load "lib/persist/snapshot.sx") (load "lib/persist/compaction.sx") (load "lib/persist/durable.sx") +(load "lib/persist/blob.sx") (load "lib/persist/subscribe.sx") (load "lib/persist/api.sx") (epoch 2) diff --git a/lib/persist/scoreboard.json b/lib/persist/scoreboard.json index 8c9bd47a..c7cbfe23 100644 --- a/lib/persist/scoreboard.json +++ b/lib/persist/scoreboard.json @@ -8,9 +8,10 @@ "concurrency": {"pass": 8, "fail": 0}, "snapshot": {"pass": 11, "fail": 0}, "compaction": {"pass": 11, "fail": 0}, - "durable": {"pass": 15, "fail": 0} + "durable": {"pass": 15, "fail": 0}, + "blob": {"pass": 14, "fail": 0} }, - "total_pass": 91, + "total_pass": 105, "total_fail": 0, - "total": 91 + "total": 105 } diff --git a/lib/persist/scoreboard.md b/lib/persist/scoreboard.md index e16c81ee..a2d95c81 100644 --- a/lib/persist/scoreboard.md +++ b/lib/persist/scoreboard.md @@ -13,4 +13,5 @@ _Generated by `lib/persist/conformance.sh`_ | snapshot | 11 | 0 | 11 | | compaction | 11 | 0 | 11 | | durable | 15 | 0 | 15 | -| **Total** | **91** | **0** | **91** | +| blob | 14 | 0 | 14 | +| **Total** | **105** | **0** | **105** | diff --git a/lib/persist/tests/blob.sx b/lib/persist/tests/blob.sx new file mode 100644 index 00000000..8a2d35b4 --- /dev/null +++ b/lib/persist/tests/blob.sx @@ -0,0 +1,112 @@ +; Phase 4 — blob backend: store the ref, never the bytes. Bytes live in a +; separate content-addressed store (mock here). + +(persist-test + "blob-ref carries cid" + (persist/blob-cid (persist/blob-ref "c1" 10 "image/png")) + "c1") +(persist-test + "blob-ref carries size" + (persist/blob-size (persist/blob-ref "c1" 10 "image/png")) + 10) +(persist-test + "blob-ref carries mime" + (persist/blob-mime (persist/blob-ref "c1" 10 "image/png")) + "image/png") +(persist-test + "blob-ref? true for a ref" + (persist/blob-ref? (persist/blob-ref "c1" 1 "x")) + true) +(persist-test + "blob-ref? false for a plain dict" + (persist/blob-ref? {:n 1}) + false) + +(persist-test + "store returns a ref, not the bytes" + (let + ((blob (persist/mock-blob (persist/mem-backend)))) + (persist/blob-ref? (persist/blob-store blob "PNGDATA" "image/png"))) + true) +(persist-test + "store records the byte length as size" + (let + ((blob (persist/mock-blob (persist/mem-backend)))) + (persist/blob-size (persist/blob-store blob "12345" "text/plain"))) + 5) +(persist-test + "fetch round-trips the bytes via the ref" + (let + ((blob (persist/mock-blob (persist/mem-backend)))) + (let + ((ref (persist/blob-store blob "PAYLOAD" "text/plain"))) + (persist/blob-fetch blob ref))) + "PAYLOAD") +(persist-test + "exists? true after store" + (let + ((blob (persist/mock-blob (persist/mem-backend)))) + (let + ((ref (persist/blob-store blob "X" "text/plain"))) + (persist/blob-exists? blob ref))) + true) +(persist-test + "content addressing: same bytes dedupe to same cid" + (let + ((blob (persist/mock-blob (persist/mem-backend)))) + (equal? + (persist/blob-cid (persist/blob-store blob "SAME" "text/plain")) + (persist/blob-cid (persist/blob-store blob "SAME" "text/plain")))) + true) +(persist-test + "different bytes get different cids" + (let + ((blob (persist/mock-blob (persist/mem-backend)))) + (equal? + (persist/blob-cid (persist/blob-store blob "A" "text/plain")) + (persist/blob-cid (persist/blob-store blob "B" "text/plain")))) + false) + +; ---------- the invariant: persist holds the ref, never the bytes ---------- +(persist-test + "a blob ref stored in kv is a ref" + (let + ((db (persist/mock-durable (persist/mem-backend))) + (blob (persist/mock-blob (persist/mem-backend)))) + (begin + (persist/kv-put + db + "avatar" + (persist/blob-store blob "BIGIMAGE" "image/png")) + (persist/blob-ref? (persist/kv-get db "avatar")))) + true) +(persist-test + "the kv value does not contain the bytes" + (let + ((db (persist/mock-durable (persist/mem-backend))) + (blob (persist/mock-blob (persist/mem-backend)))) + (begin + (persist/kv-put + db + "avatar" + (persist/blob-store blob "BIGIMAGE" "image/png")) + (has-key? (persist/kv-get db "avatar") :bytes))) + false) +(persist-test + "a blob ref stored in the log is a ref, bytes fetched separately" + (let + ((db (persist/mock-durable (persist/mem-backend))) + (store (persist/mem-backend))) + (let + ((blob (persist/mock-blob store))) + (begin + (persist/append + db + "uploads" + "added" + 0 + (persist/blob-store blob "FILEBYTES" "application/pdf")) + (let + ((ref (persist/event-data (first (persist/read db "uploads"))))) + (list (persist/blob-ref? ref) (persist/blob-fetch blob ref)))))) + (list true "FILEBYTES")) diff --git a/plans/persist-on-sx.md b/plans/persist-on-sx.md index 70eb36da..6f9915e0 100644 --- a/plans/persist-on-sx.md +++ b/plans/persist-on-sx.md @@ -42,7 +42,7 @@ read models (feeds, indices, audit logs) update incrementally. ## Status (rolling) -`bash lib/persist/conformance.sh` → **91/91** (Phases 1–3 done, Phase 4 in progress) +`bash lib/persist/conformance.sh` → **105/105** (Phases 1–3 done, Phase 4 in progress) ## Ground rules @@ -104,7 +104,7 @@ lib/persist/backend.sx lib/persist/api.sx ## Phase 4 — Durable backends via kernel IO - [x] file/log backend driven through `perform` (IO-suspension boundary) -- [ ] blob backend interface (store ref/CID; bytes live in artdag/IPFS) +- [x] blob backend interface (store ref/CID; bytes live in artdag/IPFS) - [ ] crash/restart replay test (mock IO platform) - [ ] migration notes for swapping mem → durable under a live subsystem @@ -113,6 +113,13 @@ feed/-log, flow store, mod/audit, search index, acl grants, identity sessions al become `persist` log or kv. Track each migration in that subsystem's plan. ## Progress log +- **Phase 4b (105/105).** `blob.sx` — large objects stay out of persist. A blob + ref is `{:cid :size :mime}`; the blob store is a SEPARATE injected dependency + (`persist/blob-io` over an injectable transport, perform in prod / mock + content store in tests). `persist/blob-store` puts bytes and returns ONLY the + ref; `persist/blob-fetch` retrieves bytes via the ref. Mock store is + content-addressed (same bytes dedupe). 14 tests assert the invariant: a ref in + the log/kv carries the CID, never the bytes (`has-key? :bytes` is false). - **Phase 4a (91/91).** `durable.sx` — a backend whose every op crosses the kernel IO boundary via `(perform {:op "persist/..." :args (...)})`. The transport is injectable: `persist/durable-backend` uses the kernel's