sx-gitea: linear closure walk + working-tree importer (78/78 wire)
The closure walk rebuilt its seen-set with assoc — which on this kernel copies the entire hashtable per call — and stacked pending cids with concat; pack-cids then insertion-sorted the result. All three are quadratic, which surfaced the moment a real repo (4.5k files) went over the wire: a single push spent an hour in the walk. The seen-set is now a private dict mutated in place (dict-set!, the acl engine's own pattern), pending cids are cons-stacked, and packs are unsorted (order is irrelevant to the receiver). Wire suite stays 78/78; every clone/fetch/ push on repo-scale histories now walks each object once. lib/gitea/import.sx: working-tree importer — file-read + http-request adapt the Phase 3 wire client to a live server (gitea/http-app); staging (deterministic commits, so an interrupted import replays to identical CIDs and resumes without re-pushing) is separate from the single delta push; pack lines that exceed the pkt limit are skipped and reported. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
91
lib/gitea/import.sx
Normal file
91
lib/gitea/import.sx
Normal file
@@ -0,0 +1,91 @@
|
||||
; lib/gitea/import.sx — working-tree importer.
|
||||
;
|
||||
; Reads files from the local filesystem (file-read), builds commits in an
|
||||
; in-memory sx-git repo, and pushes them to a live forge over the wire
|
||||
; protocol — gitea/http-app adapts the Phase 3 client (which drives any
|
||||
; dream-app-shaped fn) to the kernel's http-request primitive, so the
|
||||
; same push! that syncs two in-memory forges talks to a real server.
|
||||
;
|
||||
; Staging and pushing are SEPARATE: import-stage! adds one manifest of
|
||||
; files and commits (deterministic — fixed :time/:author/:message, so a
|
||||
; replay reproduces identical CIDs and can resume an interrupted import
|
||||
; without re-pushing), import-push! sends the delta between the remote's
|
||||
; advertised head and the local one in a single request. Pushing per
|
||||
; batch is quadratic (every push walks the full closure on both sides);
|
||||
; stage many, push once.
|
||||
;
|
||||
; Files whose pack line would exceed the pkt-line limit are skipped and
|
||||
; reported per batch (wire limit; see lib/gitea/wire.sx).
|
||||
;
|
||||
; Requires: the wire client stack (lib/gitea/{repo,access,web,wire}.sx
|
||||
; and deps) on a host with the file-read + http-request primitives.
|
||||
|
||||
; a dream-app-shaped fn over real HTTP: request dict in, response dict out
|
||||
(define
|
||||
gitea/http-app
|
||||
(fn
|
||||
(base)
|
||||
(fn
|
||||
(req)
|
||||
(http-request
|
||||
(get req :method)
|
||||
(str base (get req :target))
|
||||
(get req :headers)
|
||||
(get req :body)))))
|
||||
|
||||
(define gitea/import-state false)
|
||||
|
||||
(define
|
||||
gitea/import-init!
|
||||
(fn
|
||||
(base token owner name)
|
||||
(let
|
||||
((repo (git/init! (persist/mem-backend) "import")))
|
||||
(begin (set! gitea/import-state {:batch 0 :remote (gitea/remote (gitea/http-app base) owner name token) :repo repo}) true))))
|
||||
|
||||
(define
|
||||
gitea/import-lines
|
||||
(fn
|
||||
(path)
|
||||
(filter (fn (l) (not (= l ""))) (split (file-read path) "\n"))))
|
||||
|
||||
; will this file's pack line fit one pkt? (escaping can double the size)
|
||||
(define
|
||||
gitea/import-fits?
|
||||
(fn (data) (gitea/pkt-fits? (str "x" (serialize (git/blob data))))))
|
||||
|
||||
; read + stage one manifest of paths and commit — NO push
|
||||
; => {:batch n :files k :skipped (paths) :cid commit-cid}
|
||||
(define
|
||||
gitea/import-stage!
|
||||
(fn
|
||||
(root manifest)
|
||||
(let
|
||||
((st gitea/import-state))
|
||||
(let
|
||||
((paths (gitea/import-lines manifest))
|
||||
(repo (get st :repo))
|
||||
(n (+ 1 (get st :batch))))
|
||||
(let
|
||||
((skipped (reduce (fn (acc p) (let ((data (file-read (str root "/" p)))) (if (gitea/import-fits? data) (begin (git/add! repo p data) acc) (append acc (list p))))) (list) paths)))
|
||||
(let
|
||||
((cid (git/commit! repo {:message (str "import rose-ash: batch " n) :time n :author "giles"})))
|
||||
(begin (set! gitea/import-state (assoc st :batch n)) {:batch n :files (- (len paths) (len skipped)) :skipped skipped :cid cid})))))))
|
||||
|
||||
; one delta push of everything staged since the remote's advertised head
|
||||
(define
|
||||
gitea/import-push!
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((st gitea/import-state))
|
||||
(gitea/push! (get st :remote) (get st :repo) "heads/main"))))
|
||||
|
||||
; stage + push in one step (fine for small imports)
|
||||
(define
|
||||
gitea/import-batch!
|
||||
(fn
|
||||
(root manifest)
|
||||
(let
|
||||
((res (gitea/import-stage! root manifest)))
|
||||
(assoc res :push (gitea/import-push!)))))
|
||||
@@ -16,6 +16,13 @@
|
||||
; clone! / fetch! / push! / push-delete! — two in-memory forges can sync
|
||||
; with no sockets anywhere.
|
||||
;
|
||||
; Scale notes: the closure walk mutates a PRIVATE seen-dict in place
|
||||
; (dict-set!) and stacks pending cids with cons — `assoc` copies the
|
||||
; whole hashtable per call and list concat copies its head, either of
|
||||
; which makes a 10k-object walk quadratic. Pack enumeration is unsorted
|
||||
; for the same reason (artdag/sort-strings is an insertion sort; pack
|
||||
; order is irrelevant to the receiver).
|
||||
;
|
||||
; Limits: one object per pkt line => objects over ~64KB need side-band
|
||||
; chunking (future extension); gitea/pkt-fits? reports this.
|
||||
;
|
||||
@@ -117,7 +124,8 @@
|
||||
((git/tag? obj) (list (git/tag-target obj)))
|
||||
(else (list)))))
|
||||
|
||||
; walk from pending cids; returns {:seen {cid true} :missing (cids)}
|
||||
; walk from pending cids; returns {:seen {cid true} :missing (cids)}.
|
||||
; `seen` must be a PRIVATE dict — it is mutated in place.
|
||||
(define
|
||||
gitea/closure-walk
|
||||
(fn
|
||||
@@ -135,11 +143,16 @@
|
||||
(if
|
||||
(nil? obj)
|
||||
(gitea/closure-walk grepo more seen (cons cid missing))
|
||||
(gitea/closure-walk
|
||||
grepo
|
||||
(concat (gitea/obj-refs obj) more)
|
||||
(assoc seen cid true)
|
||||
missing))))))))
|
||||
(begin
|
||||
(dict-set! seen cid true)
|
||||
(gitea/closure-walk
|
||||
grepo
|
||||
(reduce
|
||||
(fn (acc r) (cons r acc))
|
||||
more
|
||||
(gitea/obj-refs obj))
|
||||
seen
|
||||
missing)))))))))
|
||||
|
||||
(define
|
||||
gitea/closure
|
||||
@@ -158,7 +171,7 @@
|
||||
(empty?
|
||||
(get (gitea/closure-walk grepo cids {} (list)) :missing))))
|
||||
|
||||
; objects needed to bring someone with `haves` up to `wants`
|
||||
; objects needed to bring someone with `haves` up to `wants` (unsorted)
|
||||
(define
|
||||
gitea/pack-cids
|
||||
(fn
|
||||
@@ -167,7 +180,7 @@
|
||||
((have-set (gitea/closure grepo haves)))
|
||||
(filter
|
||||
(fn (c) (not (get have-set c)))
|
||||
(gitea/closure-list grepo wants)))))
|
||||
(keys (gitea/closure grepo wants))))))
|
||||
|
||||
; ── wire object encoding ─────────────────────────────────────────────
|
||||
|
||||
@@ -385,7 +398,7 @@
|
||||
|
||||
; ── client ───────────────────────────────────────────────────────────
|
||||
; A remote is any dream app fn plus repo coordinates and a token — the
|
||||
; same code drives an in-memory forge or (later) a real HTTP transport.
|
||||
; same code drives an in-memory forge or a real HTTP transport.
|
||||
|
||||
(define gitea/remote (fn (app owner name token) {:name name :token token :owner owner :app app}))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user