From 8ed44f7770231c71ba588343df6a2f688848156d Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 4 Jul 2026 00:30:59 +0000 Subject: [PATCH] =?UTF-8?q?lib/gitea:=20fix=20the=20fetch-pack-over-HTTP?= =?UTF-8?q?=20hang=20=E2=80=94=20native=20parse=20fast=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sx-forge native-loop blocker: clone! of the live giles/rose-ash never returned over gitea/http-app. Root cause was NOT the transport — pack-line-parse ran every pack line through the interpreted spec parser (~6.6KB/s on the CEK machine; a full-repo pack = hours), and a non-hex byte in a pkt length header parsed negative (index-of -1), walking the scan index backwards forever. - gitea/parse-obj: use the host reader (open-input-string + read, ~3700x faster, value-identical) when the host provides it; hosts without string ports keep sx-parse. Feature-detected at load. - pkt-sections-loop: (< n 4) guard — malformed lengths error instead of hanging. - push-cmd!: haves = every advertised remote ref held locally, so a NEW branch pushes only its delta, not the whole repo closure. - tests/wire.sx: malformed-len errors, truncated-pkt clamps, parse-obj = sx-parse equivalence (blob/commit + cid). 83/83. - tests/wire-http.sh + wire-http-client.sx: end-to-end over REAL http-listen/http-request on :8943 — ls-remote/clone/push-new-branch/ fresh-clone-verify/delete. The coverage gap that hid all this. Proven vs the live forge (in sx-gitea-1): full 4468-file clone in 77s (was: hang), commit, push heads/sx-smoke-test ok, branch advertised on sx.sx-web.org. Conformance 620/620. Co-Authored-By: Claude Fable 5 --- lib/gitea/scoreboard.json | 6 +- lib/gitea/scoreboard.md | 4 +- lib/gitea/tests/wire-http-client.sx | 84 +++++++++++++++++++++++++ lib/gitea/tests/wire-http.sh | 95 +++++++++++++++++++++++++++++ lib/gitea/tests/wire.sx | 31 ++++++++++ lib/gitea/wire.sx | 60 ++++++++++++------ 6 files changed, 257 insertions(+), 23 deletions(-) create mode 100644 lib/gitea/tests/wire-http-client.sx create mode 100755 lib/gitea/tests/wire-http.sh diff --git a/lib/gitea/scoreboard.json b/lib/gitea/scoreboard.json index b949e667..a46883f1 100644 --- a/lib/gitea/scoreboard.json +++ b/lib/gitea/scoreboard.json @@ -2,14 +2,14 @@ "suites": { "repo": {"pass": 91, "fail": 0}, "access": {"pass": 103, "fail": 0}, - "wire": {"pass": 78, "fail": 0}, + "wire": {"pass": 83, "fail": 0}, "issues": {"pass": 88, "fail": 0}, "pr": {"pass": 100, "fail": 0}, "activity": {"pass": 60, "fail": 0}, "search": {"pass": 35, "fail": 0}, "fed": {"pass": 60, "fail": 0} }, - "total_pass": 615, + "total_pass": 620, "total_fail": 0, - "total": 615 + "total": 620 } diff --git a/lib/gitea/scoreboard.md b/lib/gitea/scoreboard.md index 7da60b68..86028682 100644 --- a/lib/gitea/scoreboard.md +++ b/lib/gitea/scoreboard.md @@ -6,10 +6,10 @@ _Generated by `lib/gitea/conformance.sh`_ |-------|-----:|-----:|------:| | repo | 91 | 0 | 91 | | access | 103 | 0 | 103 | -| wire | 78 | 0 | 78 | +| wire | 83 | 0 | 83 | | issues | 88 | 0 | 88 | | pr | 100 | 0 | 100 | | activity | 60 | 0 | 60 | | search | 35 | 0 | 35 | | fed | 60 | 0 | 60 | -| **Total** | **615** | **0** | **615** | +| **Total** | **620** | **0** | **620** | diff --git a/lib/gitea/tests/wire-http-client.sx b/lib/gitea/tests/wire-http-client.sx new file mode 100644 index 00000000..00bcbf8f --- /dev/null +++ b/lib/gitea/tests/wire-http-client.sx @@ -0,0 +1,84 @@ +; lib/gitea/tests/wire-http-client.sx — client half of wire-http.sh. +; +; Drives the wire client against a REAL http-listen server through +; gitea/http-app (the kernel http-request primitive) — the transport +; the in-process wire tests can't cover. Asserts land in +; /tmp/sx-wire-http-test/results.txt as "ok " / "ng ..." +; lines; wire-http.sh greps for "ng ". +; +; Requires: the wire client stack (see wire-http.sh) + lib/gitea/import.sx +; (gitea/http-app), served forge on 127.0.0.1:8943 (giles/welcome seeded). + +(define whc-out "/tmp/sx-wire-http-test/results.txt") +(define whc-results (list)) + +(define + whc-check + (fn + (name ok detail) + (begin + (set! + whc-results + (append + whc-results + (list + (if ok (str "ok " name) (str "ng " name " " (inspect detail)))))) + (file-write whc-out (join "\n" whc-results))))) + +(define whc-base "http://127.0.0.1:8943") +(define whc-token "wire-test-token") +(define whc-app (gitea/http-app whc-base)) + +; 1. ls-remote over real HTTP +(define whc-remote (gitea/remote whc-app "giles" "welcome" whc-token)) +(define whc-ls (gitea/ls-remote whc-remote)) +(whc-check "ls-remote head" (= (get whc-ls :head) "heads/main") whc-ls) + +; 2. clone over real HTTP +(define whc-forge (gitea/forge (persist/mem-backend))) +(gitea/user-create! whc-forge "giles") +(define + whc-clone + (gitea/clone! whc-forge "giles" "welcome" whc-remote {})) +(whc-check + "clone returns repo" + (= (get whc-clone :name) "welcome") + whc-clone) +(define whc-g (gitea/repo-git whc-forge "giles" "welcome")) +(define whc-files (git/commit-files whc-g (git/head whc-g))) +(whc-check + "clone materializes README" + (not (nil? (get whc-files "README.md"))) + (keys whc-files)) + +; 3. commit + push a NEW branch over real HTTP — push-cmd! with the +; commit cid (no local branch ref needed; delta-sized thanks to the +; remote-refs haves) +(git/add! whc-g "wire-http-test.txt" "pushed over real HTTP") +(define whc-cid (git/commit! whc-g {:message "wire-http round trip" :author "giles" :time 1})) +(define + whc-push + (gitea/push-cmd! whc-remote whc-g "heads/wire-http-test" whc-cid)) +(whc-check "push new branch" (= (get whc-push :ok) true) whc-push) + +; 4. fresh clone sees the pushed branch + file +(define whc-forge2 (gitea/forge (persist/mem-backend))) +(gitea/user-create! whc-forge2 "giles") +(gitea/clone! whc-forge2 "giles" "welcome" whc-remote {}) +(define whc-g2 (gitea/repo-git whc-forge2 "giles" "welcome")) +(define whc-branch (git/branch-get whc-g2 "wire-http-test")) +(whc-check + "fresh clone sees pushed branch" + (not (nil? whc-branch)) + (git/branches whc-g2)) +(define whc-files2 (git/commit-files whc-g2 whc-branch)) +(whc-check + "pushed file round-trips" + (= (get whc-files2 "wire-http-test.txt") "pushed over real HTTP") + (keys whc-files2)) + +; 5. delete the test branch again (leave the served forge clean) +(define whc-del (gitea/push-delete! whc-remote whc-g "heads/wire-http-test")) +(whc-check "delete test branch" (= (get whc-del :ok) true) whc-del) + +"WIRE-HTTP-CLIENT-DONE" \ No newline at end of file diff --git a/lib/gitea/tests/wire-http.sh b/lib/gitea/tests/wire-http.sh new file mode 100755 index 00000000..ae928208 --- /dev/null +++ b/lib/gitea/tests/wire-http.sh @@ -0,0 +1,95 @@ +#!/usr/bin/env bash +# lib/gitea/tests/wire-http.sh — end-to-end wire test over REAL HTTP. +# +# The in-process wire suite (tests/wire.sx) drives the client against +# gitea/forge-app — a function call, no sockets. This script covers the +# transport the suite can't: a served forge (kernel http-listen) and a +# client whose remote is gitea/http-app (kernel http-request). This is +# the path that carries clone/fetch/push between real machines — and +# the gap that hid the fetch-pack "hang" (interpreted-parser slowness). +# +# Serves on 127.0.0.1:8943 with a throwaway persist dir, runs +# wire-http-client.sx (ls-remote / clone / push branch / fresh-clone +# verify / delete branch), then greps the results for failures. +# +# Usage: bash lib/gitea/tests/wire-http.sh # exit 0 = pass + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}" +[ -x "$SX_SERVER" ] || SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe" +if [ ! -x "$SX_SERVER" ]; then + echo "ERROR: sx_server.exe not found." >&2 + exit 1 +fi + +PORT=8943 +WORK=/tmp/sx-wire-http-test +rm -rf "$WORK" +mkdir -p "$WORK/persist" + +# ── serve a throwaway forge ────────────────────────────────────────── +HOST_PORT=$PORT SX_INSTANCE=localhost SX_GITEA_ADMIN=giles \ +SX_GITEA_TOKEN=wire-test-token SX_PERSIST_DIR="$WORK/persist" \ +SX_HTTP_HOST=127.0.0.1 \ +timeout 600 bash lib/gitea/serve.sh > "$WORK/server.log" 2>&1 & +SERVER_PID=$! +cleanup() { kill "$SERVER_PID" 2>/dev/null; } +trap cleanup EXIT + +# wait for the ref advertisement to come up (server boots ~10s) +for i in $(seq 1 60); do + if curl -s -m 2 "http://127.0.0.1:$PORT/giles/welcome/info/refs" \ + -H "Authorization: Bearer wire-test-token" | grep -q "heads/main"; then + break + fi + sleep 2 +done +if ! curl -s -m 2 "http://127.0.0.1:$PORT/giles/welcome/info/refs" \ + -H "Authorization: Bearer wire-test-token" | grep -q "heads/main"; then + echo "FAIL: server did not come up on :$PORT" >&2 + tail -5 "$WORK/server.log" | tr -d '\000' >&2 + exit 1 +fi + +# ── run the client ─────────────────────────────────────────────────── +# wire client stack only (no web UI extras): kernel + parser + persist + +# sx-git + datalog/acl + dream + gitea repo/access/web/wire + import. +MODULES=( + "spec/stdlib.sx" "spec/parser.sx" "lib/r7rs.sx" + "lib/persist/event.sx" "lib/persist/backend.sx" "lib/persist/log.sx" "lib/persist/kv.sx" + "lib/artdag/dag.sx" + "lib/git/object.sx" "lib/git/ref.sx" "lib/git/dag.sx" "lib/git/worktree.sx" + "lib/git/diff.sx" "lib/git/merge.sx" "lib/git/porcelain.sx" + "lib/datalog/tokenizer.sx" "lib/datalog/parser.sx" "lib/datalog/unify.sx" + "lib/datalog/db.sx" "lib/datalog/builtins.sx" "lib/datalog/aggregates.sx" + "lib/datalog/strata.sx" "lib/datalog/eval.sx" "lib/datalog/api.sx" "lib/datalog/magic.sx" + "lib/acl/schema.sx" "lib/acl/facts.sx" "lib/acl/engine.sx" + "lib/dream/types.sx" "lib/dream/router.sx" "lib/dream/middleware.sx" "lib/dream/error.sx" + "lib/dream/html.sx" "lib/dream/json.sx" "lib/dream/auth.sx" "lib/dream/api.sx" + "lib/gitea/repo.sx" "lib/gitea/access.sx" "lib/gitea/web.sx" "lib/gitea/wire.sx" + "lib/gitea/import.sx" + "lib/gitea/tests/wire-http-client.sx" +) +E=1 +{ + for M in "${MODULES[@]}"; do + echo "(epoch $E)"; echo "(load \"$M\")"; E=$((E+1)) + done +} | timeout 300 "$SX_SERVER" > "$WORK/client.log" 2>&1 + +# ── report ─────────────────────────────────────────────────────────── +if [ ! -f "$WORK/results.txt" ]; then + echo "FAIL: client produced no results (see $WORK/client.log)" >&2 + tr -d '\000' < "$WORK/client.log" | tail -5 >&2 + exit 1 +fi +cat "$WORK/results.txt" +echo +if grep -q "^ng " "$WORK/results.txt"; then + echo "wire-http: FAIL" >&2 + exit 1 +fi +TOTAL=$(grep -c "^ok " "$WORK/results.txt") +echo "wire-http: PASS ($TOTAL checks)" diff --git a/lib/gitea/tests/wire.sx b/lib/gitea/tests/wire.sx index f944fa79..93fc4187 100644 --- a/lib/gitea/tests/wire.sx +++ b/lib/gitea/tests/wire.sx @@ -488,3 +488,34 @@ {}) :error) 404) + +(gitea-wire-test + "pkt-sections malformed len errors" + (guard (e (true "errored")) (gitea/pkt-sections "zzzzzzzz")) + "errored") + +(gitea-wire-test + "pkt-sections truncated pkt clamps" + (guard (e (true "errored")) (gitea/pkt-sections "0009hi")) + (list (list "hi"))) + +(define gw-po-blob (serialize (git/blob "hello \"quoted\" \n multiline"))) + +(define + gw-po-commit + (serialize (git/commit "sx1:tree" (list "sx1:p1" "sx1:p2") {:message "m" :author "a" :time 42}))) + +(gitea-wire-test + "parse-obj blob = sx-parse" + (gitea/parse-obj gw-po-blob) + (first (sx-parse gw-po-blob))) + +(gitea-wire-test + "parse-obj commit = sx-parse" + (gitea/parse-obj gw-po-commit) + (first (sx-parse gw-po-commit))) + +(gitea-wire-test + "parse-obj cid stable" + (git/cid (gitea/parse-obj gw-po-commit)) + (git/cid (first (sx-parse gw-po-commit)))) diff --git a/lib/gitea/wire.sx b/lib/gitea/wire.sx index 63934c08..f67704ee 100644 --- a/lib/gitea/wire.sx +++ b/lib/gitea/wire.sx @@ -80,7 +80,10 @@ (fn (lines) (str (join "" (map gitea/pkt lines)) gitea/pkt-flush)) sections)))) -; parse framed data into sections (lists of lines) split on flush pkts +; parse framed data into sections (lists of lines) split on flush pkts. +; A non-hex byte in a length header parses negative (index-of -> -1); +; the (< n 4) guard makes that an error — without it i walks backwards +; and the loop never terminates (a malformed body must not hang). (define gitea/pkt-sections-loop (fn @@ -90,18 +93,21 @@ (reverse (if (empty? cur) sections (cons (reverse cur) sections))) (let ((n (gitea/hex4-parse (substr data i 4)))) - (if - (= n 0) - (gitea/pkt-sections-loop - data - (+ i 4) - (list) - (cons (reverse cur) sections)) - (gitea/pkt-sections-loop - data - (+ i n) - (cons (substr data (+ i 4) (- n 4)) cur) - sections)))))) + (cond + ((= n 0) + (gitea/pkt-sections-loop + data + (+ i 4) + (list) + (cons (reverse cur) sections))) + ((< n 4) + (error (str "gitea/pkt-sections: malformed pkt-len at " i))) + (else + (gitea/pkt-sections-loop + data + (+ i n) + (cons (substr data (+ i 4) (- n 4)) cur) + sections))))))) (define gitea/pkt-sections @@ -188,6 +194,18 @@ gitea/pack-line (fn (grepo cid) (str cid " " (serialize (git/read grepo cid))))) +; parse one serialized object. sx-parse (the spec parser) is interpreted — +; ~6KB/s on the CEK machine, which turns a full-repo pack into hours; the +; host reader (open-input-string + read) parses the same grammar natively +; (~3700x faster, value-identical). Feature-detect at load: hosts without +; string ports keep the spec parser. +(define + gitea/parse-obj + (if + (and (primitive? "open-input-string") (primitive? "read")) + (fn (s) (read (open-input-string s))) + (fn (s) (first (sx-parse s))))) + ; parse " " and verify the cid matches the bytes (define gitea/pack-line-parse @@ -200,7 +218,7 @@ {:error "malformed"} (let ((cid (substr line 0 sp)) - (obj (first (sx-parse (substr line (+ sp 1)))))) + (obj (gitea/parse-obj (substr line (+ sp 1))))) (if (= (git/cid obj) cid) {:obj obj :cid cid} {:error "cid-mismatch" :cid cid})))))) ; verify + store every pack line; => {:stored n} | {:error ...} @@ -400,7 +418,7 @@ ; A remote is any dream app fn plus repo coordinates and a token — the ; same code drives an in-memory forge or a real HTTP transport. -(define gitea/remote (fn (app owner name token) {:name name :token token :owner owner :app app})) +(define gitea/remote (fn (app owner name token) {:name name :owner owner :token token :app app})) (define gitea/remote-call @@ -535,9 +553,15 @@ (nil? ls) {:error 404} (let - ((old (or (get (get ls :refs) refname) gitea/zero-ref))) + ((old (or (get (get ls :refs) refname) gitea/zero-ref)) + (haves + (filter + (fn (c) (git/has? grepo c)) + (map + (fn (k) (get (get ls :refs) k)) + (keys (get ls :refs)))))) (let - ((pack (if (equal? new gitea/zero-ref) (list) (gitea/pack-cids grepo (list new) (if (equal? old gitea/zero-ref) (list) (list old)))))) + ((pack (if (equal? new gitea/zero-ref) (list) (gitea/pack-cids grepo (list new) haves)))) (let ((resp (gitea/remote-call remote "POST" "/git-receive-pack" (gitea/pkt-render (list (list (str old " " new " " refname)) (map (fn (c) (gitea/pack-line grepo c)) pack)))))) (if @@ -564,4 +588,4 @@ gitea/push-delete! (fn (remote grepo refname) - (gitea/push-cmd! remote grepo refname gitea/zero-ref))) + (gitea/push-cmd! remote grepo refname gitea/zero-ref))) \ No newline at end of file