Files
rose-ash/next/kernel/pipeline.erl
giles 89dd23c287
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 54s
fed-sx-types Phase 4: object-schema validation stage in pipeline
pipeline:apply_object_schema/2 (+ stage_object_schema/1 factory) — the
object-schema stage between activity-type validation and the kernel
append (plans/fed-sx-host-types.md step 4). When an inbound activity's
:object declares a refinement type ({type, TypeName}), resolve it
(Cfg type_index: TypeName -> TypeCid; then peer_types:lookup_or_fetch/2,
a local hit or a wire fetch) and apply the record's refinement schema
to the object's :field_values, rejecting on schema-fail with
{error, {validation_failed, object_schema}}.

The schema is either a 1-arity Erlang predicate (substrate stand-in,
locally stored) or a term_codec-safe {required, [Field,...]} constraint
(so a wire-fetched record validates too). Default
strict_object_schema = false: an unresolvable type is let through (the
skip is where a validation_skipped log belongs); strict rejects.
Objects with no declared type, and names absent from the local index,
are skipped (open-world).

Test: next/tests/object_schema.sh (15) — local hit, wire fetch, fetch
failure strict/non-strict, no peer_types, untyped object, undeclared
name, fun + data schema forms, no-schema record, stage composition.

No regression: pipeline_signature, pipeline_driver green. Plan doc
steps 1-4 marked done.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-30 15:50:45 +00:00

304 lines
11 KiB
Erlang

-module(pipeline).
-export([run_stages/2,
validate_inbound/1, validate_inbound/3,
validate_outbound/1,
inbound_stages/0, inbound_stages/2, outbound_stages/0,
stage_envelope/1,
stage_signature/1, stage_signature/2,
stage_replay/1, stage_replay/2,
stage_schema/1, stage_schema/2,
apply_object_schema/2, stage_object_schema/1]).
%% Validation pipeline per design §14.
%%
%% A stage is a 1-arity fun `(Activity) -> ok | {error, Reason}`.
%% The driver folds the activity through the stage list, halting
%% on the first error. The pure-functional driver itself takes a
%% stage list directly so tests can inject ad-hoc stage sequences
%% without depending on the bundled inbound/outbound lists.
%%
%% Inbound pipeline (full set per design §14): envelope, signature,
%% replay, audience, activity_schema, object_schema, content_validators,
%% capabilities, trust. Outbound is a subset (no replay, no trust;
%% auth handled at the HTTP layer).
%%
%% This sub-deliverable (6a) wires only the driver and the empty
%% stage lists. Concrete stages land in 6b-6c.
run_stages(_Activity, []) -> ok;
run_stages(Activity, [Stage | Rest]) ->
Result = Stage(Activity),
case Result of
ok -> run_stages(Activity, Rest);
{error, _} -> Result
end.
validate_inbound(Activity) ->
run_stages(Activity, inbound_stages()).
%% validate_inbound/3 — Step 5b federation inbound pipeline.
%%
%% Activity: the signed envelope as received from the peer.
%% PeerActorState: the peer's actor-state proplist carrying
%% :public_keys for signature verification. Caller
%% resolves this — for v2 it's either pre-populated
%% from a peer-actors cache (Step 5c) or known from
%% a two-instance test fixture.
%% InboxLog: the receiving actor's :actor_inbox log state.
%% Used by stage_replay to reject duplicate :id.
%%
%% Stages (per design §13.2 + §14):
%% stage_envelope — shape check
%% stage_signature(PeerAS) — peer sig verify
%% stage_replay(InboxLog) — replay defence against
%% receiving actor's inbox
%%
%% Returns ok | {error, Reason}. The driver halts on first failure.
%% Audience / schema / capabilities / trust stages defer to v3.
validate_inbound(Activity, PeerActorState, InboxLog) ->
run_stages(Activity, inbound_stages(PeerActorState, InboxLog)).
validate_outbound(Activity) ->
run_stages(Activity, outbound_stages()).
inbound_stages() ->
[fun (A) -> stage_envelope(A) end].
%% inbound_stages/2 — the full ordered stage list for federation
%% inbound (envelope -> peer sig -> replay against inbox).
inbound_stages(PeerActorState, InboxLog) ->
[fun (A) -> stage_envelope(A) end,
stage_signature(PeerActorState),
stage_replay(InboxLog)].
outbound_stages() ->
[fun (A) -> stage_envelope(A) end].
%% ── Concrete stages ─────────────────────────────────────────────
%% stage_envelope/1 — wrap envelope:validate_shape/1. The pipeline
%% driver expects ok | {error, R}; validate_shape returns exactly
%% that, so delegation is direct.
stage_envelope(Activity) ->
envelope:validate_shape(Activity).
%% stage_signature/2 — direct (Activity, ActorState) check. Wraps
%% envelope:verify_signature/2 from Step 2c. Useful for tests and
%% for callers that already have ActorState in scope.
stage_signature(Activity, ActorState) ->
envelope:verify_signature(Activity, ActorState).
%% stage_signature/1 — factory: takes the ActorState and returns a
%% 1-arity stage fun the pipeline driver can fold. This is how
%% signature checking gets composed into a stage list at runtime
%% (the static `inbound_stages/0` list omits it precisely because
%% ActorState isn't available at static-list build time).
stage_signature(ActorState) ->
fun (Activity) -> envelope:verify_signature(Activity, ActorState) end.
%% stage_replay/2 — checks the in-memory log for an existing
%% activity with the same :id. Returns ok if the activity is new,
%% `{error, replay}` if the log already carries it, `{error, no_id}`
%% if the activity has no :id field. The check is linear scan of
%% log entries; the projection scheduler (Step 7) will eventually
%% maintain a CID index that turns this into O(1).
stage_replay(Activity, LogState) ->
case envelope:get_field(id, Activity) of
not_found -> {error, no_id};
{ok, Id} ->
case log_has_id(Id, log:entries(LogState)) of
true -> {error, replay};
false -> ok
end
end.
stage_replay(LogState) ->
fun (Activity) -> stage_replay(Activity, LogState) end.
log_has_id(_, []) -> false;
log_has_id(Id, [Act | Rest]) ->
case envelope:get_field(id, Act) of
{ok, Id} -> true;
_ -> log_has_id(Id, Rest)
end.
%% stage_schema/2 — validates the activity's :object against the
%% schema registered for its :type. SchemaLookup is a caller-
%% supplied fun (Type) -> {ok, SchemaFn} | not_found; SchemaFn is
%% itself a fun (Object) -> bool. Returns:
%% ok when the schema accepts the object
%% {error, no_type} when the activity has no :type
%% {error, schema_mismatch} when SchemaFn returned false
%%
%% Open-world default: an unregistered Type returns ok so the
%% pipeline doesn't block activities the kernel hasn't yet learned
%% about. Tightening to strict-world happens later in milestone 2.
%%
%% Activities with no :object skip the schema check (some verbs
%% legitimately carry no object).
%%
%% The Erlang-fun shape is the substrate-friendly stand-in for the
%% SX-source :schema bodies stored in the genesis bundle. Once an
%% SX-source eval bridge exists, the same stage shape will dispatch
%% through it instead — no API change.
stage_schema(Activity, SchemaLookup) ->
case envelope:get_field(type, Activity) of
not_found -> {error, no_type};
{ok, Type} ->
case SchemaLookup(Type) of
not_found -> ok;
{ok, SchemaFn} ->
check_object_schema(Activity, SchemaFn)
end
end.
check_object_schema(Activity, SchemaFn) ->
case envelope:get_field(object, Activity) of
not_found -> ok;
{ok, Obj} ->
case SchemaFn(Obj) of
true -> ok;
false -> {error, schema_mismatch}
end
end.
stage_schema(SchemaLookup) ->
fun (Activity) -> stage_schema(Activity, SchemaLookup) end.
%% ── host-type fed Step 4: object-schema validation stage ────────
%%
%% apply_object_schema/2 — when an inbound activity's :object declares
%% a refinement type ({type, TypeName} on the object), resolve that
%% type's record and apply its refinement schema to the object's
%% :field_values. Sits between activity-type (stage_schema) validation
%% and the kernel append; rejects the activity on schema-fail.
%%
%% Resolution mirrors the design note: TypeName -> TypeCid via Cfg's
%% `type_index` ([{TypeName, TypeCid}, ...], the local Define-name
%% index), then TypeCid -> TypeRecord via peer_types:lookup_or_fetch/2
%% (a local cache hit, or a wire fetch through the Cfg type_fetch_fn).
%%
%% Outcomes:
%% object has no {type, _} -> ok (no schema applies)
%% TypeName not in type_index -> ok (undeclared type;
%% open-world default)
%% record resolved, schema passes -> ok
%% record resolved, schema fails -> {error, {validation_failed,
%% object_schema}}
%% record unresolvable (cache miss + -> strict_object_schema:
%% fetch failure / no peer_types) true -> {error, ...}
%% false -> ok (skipped)
%%
%% Default strict_object_schema = false: a node only blocks on an
%% unresolvable type when it opts into airtight validation via Cfg
%% {strict_object_schema, true}. The non-strict skip is where a
%% `validation_skipped` log entry belongs (left to the caller's logger
%% so this stage keeps the ok | {error, _} contract run_stages wants).
%%
%% A TypeRecord's refinement schema is either a 1-arity Erlang
%% predicate over the field-values (the substrate stand-in, for
%% locally-defined types) or a data constraint {required, [Field, ...]}
%% (term_codec-safe, so a wire-fetched TypeRecord can still validate).
apply_object_schema(Activity, Cfg) ->
case object_type_name(Activity) of
none -> ok;
{ok, TypeName} ->
case type_cid_for(TypeName, Cfg) of
none -> ok;
{ok, TypeCid} ->
case resolve_type_record(TypeCid, Cfg) of
{ok, TR} -> check_object_against(Activity, TR);
{error, _} -> on_unresolved_type(Cfg)
end
end
end.
stage_object_schema(Cfg) ->
fun (Activity) -> apply_object_schema(Activity, Cfg) end.
object_type_name(Activity) ->
case envelope:get_field(object, Activity) of
{ok, Obj} when is_list(Obj) ->
case envelope:get_field(type, Obj) of
{ok, T} -> {ok, T};
_ -> none
end;
_ -> none
end.
object_field_values(Activity) ->
case envelope:get_field(object, Activity) of
{ok, Obj} when is_list(Obj) ->
case envelope:get_field(field_values, Obj) of
{ok, FV} -> FV;
_ -> []
end;
_ -> []
end.
type_cid_for(TypeName, Cfg) ->
case stage_field(type_index, Cfg) of
nil -> none;
Index ->
case find_keyed(TypeName, Index) of
{ok, Cid} -> {ok, Cid};
_ -> none
end
end.
resolve_type_record(TypeCid, Cfg) ->
case stage_field(peer_types, Cfg) of
nil -> {error, no_peer_types};
_ ->
case erlang:whereis(peer_types) of
undefined -> {error, peer_types_down};
_ -> peer_types:lookup_or_fetch(TypeCid, Cfg)
end
end.
on_unresolved_type(Cfg) ->
case stage_field(strict_object_schema, Cfg) of
true -> {error, {validation_failed, object_schema}};
_ -> ok
end.
check_object_against(Activity, TR) ->
case stage_field(refinement_schema, TR) of
nil -> ok;
Schema -> apply_refinement(Schema, object_field_values(Activity))
end.
apply_refinement(Fn, FieldValues) when is_function(Fn, 1) ->
case Fn(FieldValues) of
true -> ok;
_ -> {error, {validation_failed, object_schema}}
end;
apply_refinement({required, Fields}, FieldValues) ->
case all_present(Fields, FieldValues) of
true -> ok;
false -> {error, {validation_failed, object_schema}}
end;
apply_refinement(_, _) -> ok.
all_present([], _) -> true;
all_present([F | Rest], FV) ->
case has_key(F, FV) of
true -> all_present(Rest, FV);
false -> false
end.
has_key(_, []) -> false;
has_key(K, [{K, _} | _]) -> true;
has_key(K, [_ | Rest]) -> has_key(K, Rest).
stage_field(K, [{K, V} | _]) -> V;
stage_field(K, [_ | Rest]) -> stage_field(K, Rest);
stage_field(_, []) -> nil.
find_keyed(_, []) -> {error, not_found};
find_keyed(K, [{K, V} | _]) -> {ok, V};
find_keyed(K, [_ | Rest]) -> find_keyed(K, Rest).