From ff024d1b5d0cd713a5fe81e74dc9cd8f4286f500 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 7 Jun 2026 03:11:03 +0000 Subject: [PATCH] =?UTF-8?q?fed-sx-m2:=20Step=2010a=20=E2=80=94=20discovery?= =?UTF-8?q?=20primitives=20+=2012=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New next/kernel/discovery.erl with the local-side webfinger primitives per design §13.7: parse_acct/1(Bin) -> {ok, User, Host} | {error, _} Accepts <> (with prefix) or <> (bare). Host preserves an optional :port suffix. Rejects empty user/host and missing @. parse_resource/1 alias for the webfinger ?resource= shape actor_url_for/2(User, Host) Synthesises </actors/>>. TLS / https is v3, gated on a TLS substrate Blocker. webfinger_body/3(User, Host, ActorUrl) Builds the RFC 7033 JSON body: {"subject":"acct:@", "links":[{"rel":"self", "type":"application/activity+json", "href":""}]} Hand-rolled byte concatenation — no JSON BIF on this port. Substrate gotcha re-confirmed: <<"acct:">> string literals truncate to one byte on this port. "acct:" is spelled as <<97,99,99,116,58>> in the implementation. 12/12 in next/tests/discovery.sh covering: - parse_acct prefixed + bare forms - host with :port preserved - reject empty user / missing @ / empty host - parse_resource alias - actor_url_for synthesis + port preservation - webfinger_body prefix shape + byte_size sanity Step 10b (http_server route GET /.well-known/webfinger) and Step 10c (peer-actor fetch via Step 5's lookup_or_fetch slot) layer on top. 10c gates on Blockers #2 (native http-request primitive missing). --- next/kernel/discovery.erl | 98 ++++++++++++++++++++++++++++ next/tests/discovery.sh | 124 ++++++++++++++++++++++++++++++++++++ plans/fed-sx-milestone-2.md | 43 +++++++++++-- 3 files changed, 258 insertions(+), 7 deletions(-) create mode 100644 next/kernel/discovery.erl create mode 100755 next/tests/discovery.sh diff --git a/next/kernel/discovery.erl b/next/kernel/discovery.erl new file mode 100644 index 00000000..fec4c18d --- /dev/null +++ b/next/kernel/discovery.erl @@ -0,0 +1,98 @@ +-module(discovery). +-export([parse_acct/1, parse_resource/1, + actor_url_for/2, webfinger_body/3]). + +%% Discovery primitives per design §13.7. Step 10a covers the +%% local-side webfinger endpoint (responding when a peer asks +%% "where does acct:alice@here live?"); the peer-fetch direction +%% (loading a peer's actor doc lazily on first inbound) is Step 10b +%% and gates on Blockers #2 (native http-request primitive). +%% +%% parse_acct/1 — accept a binary in either form: +%% <<"acct:alice@host:port">> (full prefixed URI) +%% <<"alice@host:port">> (bare account, prefix optional) +%% Returns {ok, User, Host} | {error, Reason}. +%% +%% parse_resource/1 — the resource= query parameter from +%% /.well-known/webfinger. Same shape as parse_acct. +%% +%% actor_url_for/2(User, Host) — synthesises the canonical +%% per-actor URL `:///actors/`. v2 hardcodes +%% http://; TLS / https is v3 (Blockers gate). +%% +%% webfinger_body/3 — builds the JSON response body. + +%% ── parse_acct / parse_resource ───────────────────────────────── + +%% "acct:" -> 5 bytes: 97 99 99 116 58 +parse_acct(Bin) when is_binary(Bin) -> + AcctPrefix = <<97,99,99,116,58>>, + case strip_prefix(AcctPrefix, Bin) of + {ok, Rest} -> split_user_host(Rest); + nomatch -> split_user_host(Bin) + end; +parse_acct(_) -> {error, bad_input}. + +parse_resource(Bin) -> parse_acct(Bin). + +%% strip_prefix/2 — return {ok, Rest} when Bin starts with Prefix, +%% else nomatch. Substrate has no proper prefix-match BIF; this +%% byte-walks. + +strip_prefix(<<>>, Rest) -> {ok, Rest}; +strip_prefix(<>, <>) -> + strip_prefix(PRest, RRest); +strip_prefix(_, _) -> nomatch. + +%% split_user_host/1 — split a `user@host[:port]` binary at the +%% first `@`. Returns {ok, User, Host} where Host may include the +%% optional port suffix. + +split_user_host(Bin) -> + case split_at(64, Bin) of % 64 = '@' + {Before, After} when byte_size(Before) > 0, byte_size(After) > 0 -> + {ok, Before, After}; + _ -> + {error, bad_acct} + end. + +split_at(Byte, Bin) -> + split_at(Byte, Bin, <<>>). + +split_at(_, <<>>, Acc) -> + {Acc, <<>>}; +split_at(Byte, <>, Acc) -> + {Acc, Rest}; +split_at(Byte, <>, Acc) -> + split_at(Byte, Rest, <>). + +%% ── URL synthesis ────────────────────────────────────────────── + +%% "http://" -> 7 bytes | "/actors/" -> 8 bytes +actor_url_for(User, Host) -> + Pre = <<104,116,116,112,58,47,47>>, % "http://" + Mid = <<47,97,99,116,111,114,115,47>>, % "/actors/" + <
>.
+
+%% ── webfinger JSON body ────────────────────────────────────────
+%%
+%% Mastodon-shape per RFC 7033:
+%%   {"subject":"acct:@",
+%%    "links":[{"rel":"self",
+%%              "type":"application/activity+json",
+%%              "href":""}]}
+%%
+%% Hand-rolled byte concatenation — no JSON BIF on this port. The
+%% caller has already validated User + Host; we don't need to
+%% re-escape (Mastodon's webfinger inputs are alphanumeric +
+%% .-_ in practice).
+
+webfinger_body(User, Host, ActorUrl) ->
+    AcctPre = <<123,34,115,117,98,106,101,99,116,34,58,34,97,99,99,116,58>>,  % '{"subject":"acct:'
+    AcctAt  = <<64>>,                                                          % '@'
+    LinksHd = <<34,44,34,108,105,110,107,115,34,58,91,123,34,114,101,108,34,58,34,115,101,108,102,34,44,
+                34,116,121,112,101,34,58,34,97,112,112,108,105,99,97,116,105,111,110,47,97,99,116,
+                105,118,105,116,121,43,106,115,111,110,34,44,34,104,114,101,102,34,58,34>>,         % '","links":[{"rel":"self","type":"application/activity+json","href":"'
+    LinksTl = <<34,125,93,125,10>>,                                            % '"}]}\n'
+    <>.
diff --git a/next/tests/discovery.sh b/next/tests/discovery.sh
new file mode 100755
index 00000000..39fdde16
--- /dev/null
+++ b/next/tests/discovery.sh
@@ -0,0 +1,124 @@
+#!/usr/bin/env bash
+# next/tests/discovery.sh — m2 Step 10a test.
+#
+# Local-side webfinger primitives: parse acct: URIs, synthesise
+# actor URLs, build the RFC 7033 webfinger JSON body.
+
+set -uo pipefail
+cd "$(git rev-parse --show-toplevel)"
+
+SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
+if [ ! -x "$SX_SERVER" ]; then
+  SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe"
+fi
+if [ ! -x "$SX_SERVER" ]; then
+  echo "ERROR: sx_server.exe not found." >&2
+  exit 1
+fi
+
+VERBOSE="${1:-}"
+PASS=0; FAIL=0; ERRORS=""
+TMPFILE=$(mktemp); trap "rm -f $TMPFILE" EXIT
+
+cat > "$TMPFILE" <<'EPOCHS'
+(epoch 1)
+(load "lib/erlang/tokenizer.sx")
+(load "lib/erlang/parser.sx")
+(load "lib/erlang/parser-core.sx")
+(load "lib/erlang/parser-expr.sx")
+(load "lib/erlang/parser-module.sx")
+(load "lib/erlang/transpile.sx")
+(load "lib/erlang/runtime.sx")
+(load "lib/erlang/vm/dispatcher.sx")
+(epoch 2)
+(eval "(get (erlang-load-module (file-read \"next/kernel/discovery.erl\")) :name)")
+(epoch 3)
+(eval "(get (erlang-load-module (file-read \"next/kernel/http_server.erl\")) :name)")
+
+;; parse_acct accepts the acct: prefix form
+(epoch 10)
+(eval "(get (erlang-eval-ast \"discovery:parse_acct(<<97,99,99,116,58,97,108,105,99,101,64,104,111,115,116>>) =:= {ok, <<97,108,105,99,101>>, <<104,111,115,116>>}\") :name)")
+
+;; parse_acct accepts the bare form
+(epoch 11)
+(eval "(get (erlang-eval-ast \"discovery:parse_acct(<<97,108,105,99,101,64,104,111,115,116>>) =:= {ok, <<97,108,105,99,101>>, <<104,111,115,116>>}\") :name)")
+
+;; parse_acct host with port
+(epoch 12)
+(eval "(get (erlang-eval-ast \"discovery:parse_acct(<<97,108,105,99,101,64,104,111,115,116,58,57,57,57,57>>) =:= {ok, <<97,108,105,99,101>>, <<104,111,115,116,58,57,57,57,57>>}\") :name)")
+
+;; parse_acct rejects empty user
+(epoch 13)
+(eval "(get (erlang-eval-ast \"case discovery:parse_acct(<<64,104,111,115,116>>) of {error, _} -> true; _ -> false end\") :name)")
+
+;; parse_acct rejects missing @
+(epoch 14)
+(eval "(get (erlang-eval-ast \"case discovery:parse_acct(<<97,108,105,99,101>>) of {error, _} -> true; _ -> false end\") :name)")
+
+;; parse_acct rejects empty host
+(epoch 15)
+(eval "(get (erlang-eval-ast \"case discovery:parse_acct(<<97,108,105,99,101,64>>) of {error, _} -> true; _ -> false end\") :name)")
+
+;; parse_resource is an alias for parse_acct
+(epoch 16)
+(eval "(get (erlang-eval-ast \"discovery:parse_resource(<<97,99,99,116,58,98,111,98,64,98,46,99,111,109>>) =:= {ok, <<98,111,98>>, <<98,46,99,111,109>>}\") :name)")
+
+;; actor_url_for synthesises http:///actors/
+(epoch 17)
+(eval "(get (erlang-eval-ast \"discovery:actor_url_for(<<97,108,105,99,101>>, <<104,111,115,116>>) =:= <<104,116,116,112,58,47,47,104,111,115,116,47,97,99,116,111,114,115,47,97,108,105,99,101>>\") :name)")
+
+;; actor_url_for preserves port in host
+(epoch 18)
+(eval "(get (erlang-eval-ast \"discovery:actor_url_for(<<98,111,98>>, <<104,58,57,57>>) =:= <<104,116,116,112,58,47,47,104,58,57,57,47,97,99,116,111,114,115,47,98,111,98>>\") :name)")
+
+;; webfinger_body starts with {"subject":"acct:@" — http_server:match_prefix
+(epoch 19)
+(eval "(get (erlang-eval-ast \"B = discovery:webfinger_body(<<97,108,105,99,101>>, <<104,111,115,116>>, <<117,114,108>>), Pre = <<123,34,115,117,98,106,101,99,116,34,58,34,97,99,99,116,58,97,108,105,99,101,64,104,111,115,116,34>>, http_server:match_prefix(Pre, B) =/= nomatch\") :name)")
+
+;; webfinger_body byte_size is at least subject+links length (sanity)
+(epoch 20)
+(eval "(get (erlang-eval-ast \"B = discovery:webfinger_body(<<97,108,105,99,101>>, <<104,111,115,116>>, <<117,114,108>>), byte_size(B) > 80\") :name)")
+EPOCHS
+
+OUTPUT=$(timeout 480 "$SX_SERVER" < "$TMPFILE" 2>/dev/null)
+
+check() {
+  local epoch="$1" desc="$2" expected="$3"
+  local actual
+  actual=$(echo "$OUTPUT" | awk -v e="$epoch" '
+    $0 ~ "^\\(ok-len " e " " { getline; print; exit }
+    $0 ~ "^\\(ok " e " "     { print; exit }
+    $0 ~ "^\\(error " e " "  { print; exit }
+  ')
+  [ -z "$actual" ] && actual=""
+  if echo "$actual" | grep -qF -- "$expected"; then
+    PASS=$((PASS+1))
+    [ "$VERBOSE" = "-v" ] && echo "  ok $desc"
+  else
+    FAIL=$((FAIL+1))
+    ERRORS+="  FAIL [$desc] (epoch $epoch) expected: $expected | actual: $actual
+"
+  fi
+}
+
+check  2  "discovery module loaded"          "discovery"
+check 10  "parse_acct prefixed"              "true"
+check 11  "parse_acct bare form"             "true"
+check 12  "parse_acct host with port"        "true"
+check 13  "parse_acct empty user -> error"   "true"
+check 14  "parse_acct missing @ -> error"    "true"
+check 15  "parse_acct empty host -> error"   "true"
+check 16  "parse_resource alias"             "true"
+check 17  "actor_url_for synthesises"        "true"
+check 18  "actor_url_for preserves port"     "true"
+check 19  "webfinger_body subject prefix"    "true"
+check 20  "webfinger_body has body bytes"    "true"
+
+TOTAL=$((PASS+FAIL))
+if [ $FAIL -eq 0 ]; then
+  echo "ok $PASS/$TOTAL next/tests/discovery.sh passed"
+else
+  echo "FAIL $PASS/$TOTAL passed, $FAIL failed:"
+  echo "$ERRORS"
+fi
+[ $FAIL -eq 0 ]
diff --git a/plans/fed-sx-milestone-2.md b/plans/fed-sx-milestone-2.md
index f19aa08f..4cd6ca22 100644
--- a/plans/fed-sx-milestone-2.md
+++ b/plans/fed-sx-milestone-2.md
@@ -654,13 +654,33 @@ Per §13.7: webfinger plus actor doc fetch.
 
 **Deliverables:**
 
-- `GET /.well-known/webfinger?resource=acct:alice@` returns the
-  actor URL.
-- `GET /actors/` returns the actor doc (already exists from
-  M1 Step 8c-actors).
-- Peer-actor cache: when verifying a peer's signature for the first
-  time, fetch their actor doc, store in `peer-actors` projection.
-- `discovery:resolve/1("acct:alice@host:port")` returns the actor URL.
+- [x] **10a** — Local-side discovery primitives in
+  `next/kernel/discovery.erl`:
+  - `parse_acct/1(<<"acct:user@host">>)` and
+    `parse_acct/1(<<"user@host">>)` (prefix optional) return
+    `{ok, User, Host}` or `{error, _}`. Reject empty user/host
+    and missing `@`. Host preserves an optional `:port` suffix.
+  - `parse_resource/1` is an alias for the webfinger query
+    parameter shape.
+  - `actor_url_for/2(User, Host)` synthesises
+    `http:///actors/` (TLS / https is v3, gated by
+    a TLS substrate Blocker).
+  - `webfinger_body/3(User, Host, ActorUrl)` builds the RFC 7033
+    JSON body with `:subject` + `:links[]` carrying
+    `rel: self / type: application/activity+json / href`.
+  Hand-rolled byte concatenation — no JSON BIF on this port.
+  `<<"...">>` string-literal segments truncate to one byte on
+  this port (briefing gotcha re-confirmed), so `"acct:"` is
+  spelled as `<<97,99,99,116,58>>`. 12/12 in `discovery.sh`.
+- [ ] **10b** — http_server route for
+  `GET /.well-known/webfinger?resource=acct:...`: parses the
+  query, looks up the actor via the kernel, returns 200 +
+  webfinger_body when known, 404 otherwise.
+- [ ] **10c** — Peer-actor fetch + cache write. Gates on
+  Blockers #2 (native `http-request` primitive missing).
+  Step 5's peer_actors cache already exposes the
+  `lookup_or_fetch` shape; this Step 10c plugs the discovery
+  HTTP fetch into the FetchFn slot.
 
 **Tests:**
 
@@ -950,6 +970,15 @@ proceed.
 
 Newest first.
 
+- **2026-06-07** — Step 10a: discovery primitives. New
+  `next/kernel/discovery.erl` parses acct: URIs
+  (prefix optional), synthesises `http:///actors/`,
+  and builds RFC 7033 webfinger JSON bodies. Hand-rolled byte
+  concatenation since this port has no JSON BIF and `<<"...">>`
+  string literals truncate to one byte (substrate gotcha). 12/12
+  in `discovery.sh`. The route wiring (10b) and peer-actor
+  fetch (10c) layer on top — 10c gates on Blockers #2.
+
 - **2026-06-07** — Step 8c: delivery-state projection. New
   `next/kernel/delivery_state.erl` folds enqueue / delivered /
   failed / dead_lettered events into a per-peer worker-shaped