diff --git a/lib/gitea/import.sx b/lib/gitea/import.sx new file mode 100644 index 00000000..15d9e433 --- /dev/null +++ b/lib/gitea/import.sx @@ -0,0 +1,91 @@ +; lib/gitea/import.sx — working-tree importer. +; +; Reads files from the local filesystem (file-read), builds commits in an +; in-memory sx-git repo, and pushes them to a live forge over the wire +; protocol — gitea/http-app adapts the Phase 3 client (which drives any +; dream-app-shaped fn) to the kernel's http-request primitive, so the +; same push! that syncs two in-memory forges talks to a real server. +; +; Staging and pushing are SEPARATE: import-stage! adds one manifest of +; files and commits (deterministic — fixed :time/:author/:message, so a +; replay reproduces identical CIDs and can resume an interrupted import +; without re-pushing), import-push! sends the delta between the remote's +; advertised head and the local one in a single request. Pushing per +; batch is quadratic (every push walks the full closure on both sides); +; stage many, push once. +; +; Files whose pack line would exceed the pkt-line limit are skipped and +; reported per batch (wire limit; see lib/gitea/wire.sx). +; +; Requires: the wire client stack (lib/gitea/{repo,access,web,wire}.sx +; and deps) on a host with the file-read + http-request primitives. + +; a dream-app-shaped fn over real HTTP: request dict in, response dict out +(define + gitea/http-app + (fn + (base) + (fn + (req) + (http-request + (get req :method) + (str base (get req :target)) + (get req :headers) + (get req :body))))) + +(define gitea/import-state false) + +(define + gitea/import-init! + (fn + (base token owner name) + (let + ((repo (git/init! (persist/mem-backend) "import"))) + (begin (set! gitea/import-state {:batch 0 :remote (gitea/remote (gitea/http-app base) owner name token) :repo repo}) true)))) + +(define + gitea/import-lines + (fn + (path) + (filter (fn (l) (not (= l ""))) (split (file-read path) "\n")))) + +; will this file's pack line fit one pkt? (escaping can double the size) +(define + gitea/import-fits? + (fn (data) (gitea/pkt-fits? (str "x" (serialize (git/blob data)))))) + +; read + stage one manifest of paths and commit — NO push +; => {:batch n :files k :skipped (paths) :cid commit-cid} +(define + gitea/import-stage! + (fn + (root manifest) + (let + ((st gitea/import-state)) + (let + ((paths (gitea/import-lines manifest)) + (repo (get st :repo)) + (n (+ 1 (get st :batch)))) + (let + ((skipped (reduce (fn (acc p) (let ((data (file-read (str root "/" p)))) (if (gitea/import-fits? data) (begin (git/add! repo p data) acc) (append acc (list p))))) (list) paths))) + (let + ((cid (git/commit! repo {:message (str "import rose-ash: batch " n) :time n :author "giles"}))) + (begin (set! gitea/import-state (assoc st :batch n)) {:batch n :files (- (len paths) (len skipped)) :skipped skipped :cid cid}))))))) + +; one delta push of everything staged since the remote's advertised head +(define + gitea/import-push! + (fn + () + (let + ((st gitea/import-state)) + (gitea/push! (get st :remote) (get st :repo) "heads/main")))) + +; stage + push in one step (fine for small imports) +(define + gitea/import-batch! + (fn + (root manifest) + (let + ((res (gitea/import-stage! root manifest))) + (assoc res :push (gitea/import-push!))))) diff --git a/lib/gitea/wire.sx b/lib/gitea/wire.sx index 90e6ee6b..63934c08 100644 --- a/lib/gitea/wire.sx +++ b/lib/gitea/wire.sx @@ -16,6 +16,13 @@ ; clone! / fetch! / push! / push-delete! — two in-memory forges can sync ; with no sockets anywhere. ; +; Scale notes: the closure walk mutates a PRIVATE seen-dict in place +; (dict-set!) and stacks pending cids with cons — `assoc` copies the +; whole hashtable per call and list concat copies its head, either of +; which makes a 10k-object walk quadratic. Pack enumeration is unsorted +; for the same reason (artdag/sort-strings is an insertion sort; pack +; order is irrelevant to the receiver). +; ; Limits: one object per pkt line => objects over ~64KB need side-band ; chunking (future extension); gitea/pkt-fits? reports this. ; @@ -117,7 +124,8 @@ ((git/tag? obj) (list (git/tag-target obj))) (else (list))))) -; walk from pending cids; returns {:seen {cid true} :missing (cids)} +; walk from pending cids; returns {:seen {cid true} :missing (cids)}. +; `seen` must be a PRIVATE dict — it is mutated in place. (define gitea/closure-walk (fn @@ -135,11 +143,16 @@ (if (nil? obj) (gitea/closure-walk grepo more seen (cons cid missing)) - (gitea/closure-walk - grepo - (concat (gitea/obj-refs obj) more) - (assoc seen cid true) - missing)))))))) + (begin + (dict-set! seen cid true) + (gitea/closure-walk + grepo + (reduce + (fn (acc r) (cons r acc)) + more + (gitea/obj-refs obj)) + seen + missing))))))))) (define gitea/closure @@ -158,7 +171,7 @@ (empty? (get (gitea/closure-walk grepo cids {} (list)) :missing)))) -; objects needed to bring someone with `haves` up to `wants` +; objects needed to bring someone with `haves` up to `wants` (unsorted) (define gitea/pack-cids (fn @@ -167,7 +180,7 @@ ((have-set (gitea/closure grepo haves))) (filter (fn (c) (not (get have-set c))) - (gitea/closure-list grepo wants))))) + (keys (gitea/closure grepo wants)))))) ; ── wire object encoding ───────────────────────────────────────────── @@ -385,7 +398,7 @@ ; ── client ─────────────────────────────────────────────────────────── ; A remote is any dream app fn plus repo coordinates and a token — the -; same code drives an in-memory forge or (later) a real HTTP transport. +; same code drives an in-memory forge or a real HTTP transport. (define gitea/remote (fn (app owner name token) {:name name :token token :owner owner :app app}))