-module(pipeline). -export([run_stages/2, validate_inbound/1, validate_inbound/3, validate_outbound/1, inbound_stages/0, inbound_stages/2, outbound_stages/0, stage_envelope/1, stage_signature/1, stage_signature/2, stage_replay/1, stage_replay/2, stage_schema/1, stage_schema/2, apply_object_schema/2, stage_object_schema/1]). %% Validation pipeline per design §14. %% %% A stage is a 1-arity fun `(Activity) -> ok | {error, Reason}`. %% The driver folds the activity through the stage list, halting %% on the first error. The pure-functional driver itself takes a %% stage list directly so tests can inject ad-hoc stage sequences %% without depending on the bundled inbound/outbound lists. %% %% Inbound pipeline (full set per design §14): envelope, signature, %% replay, audience, activity_schema, object_schema, content_validators, %% capabilities, trust. Outbound is a subset (no replay, no trust; %% auth handled at the HTTP layer). %% %% This sub-deliverable (6a) wires only the driver and the empty %% stage lists. Concrete stages land in 6b-6c. run_stages(_Activity, []) -> ok; run_stages(Activity, [Stage | Rest]) -> Result = Stage(Activity), case Result of ok -> run_stages(Activity, Rest); {error, _} -> Result end. validate_inbound(Activity) -> run_stages(Activity, inbound_stages()). %% validate_inbound/3 — Step 5b federation inbound pipeline. %% %% Activity: the signed envelope as received from the peer. %% PeerActorState: the peer's actor-state proplist carrying %% :public_keys for signature verification. Caller %% resolves this — for v2 it's either pre-populated %% from a peer-actors cache (Step 5c) or known from %% a two-instance test fixture. %% InboxLog: the receiving actor's :actor_inbox log state. %% Used by stage_replay to reject duplicate :id. %% %% Stages (per design §13.2 + §14): %% stage_envelope — shape check %% stage_signature(PeerAS) — peer sig verify %% stage_replay(InboxLog) — replay defence against %% receiving actor's inbox %% %% Returns ok | {error, Reason}. The driver halts on first failure. %% Audience / schema / capabilities / trust stages defer to v3. validate_inbound(Activity, PeerActorState, InboxLog) -> run_stages(Activity, inbound_stages(PeerActorState, InboxLog)). validate_outbound(Activity) -> run_stages(Activity, outbound_stages()). inbound_stages() -> [fun (A) -> stage_envelope(A) end]. %% inbound_stages/2 — the full ordered stage list for federation %% inbound (envelope -> peer sig -> replay against inbox). inbound_stages(PeerActorState, InboxLog) -> [fun (A) -> stage_envelope(A) end, stage_signature(PeerActorState), stage_replay(InboxLog)]. outbound_stages() -> [fun (A) -> stage_envelope(A) end]. %% ── Concrete stages ───────────────────────────────────────────── %% stage_envelope/1 — wrap envelope:validate_shape/1. The pipeline %% driver expects ok | {error, R}; validate_shape returns exactly %% that, so delegation is direct. stage_envelope(Activity) -> envelope:validate_shape(Activity). %% stage_signature/2 — direct (Activity, ActorState) check. Wraps %% envelope:verify_signature/2 from Step 2c. Useful for tests and %% for callers that already have ActorState in scope. stage_signature(Activity, ActorState) -> envelope:verify_signature(Activity, ActorState). %% stage_signature/1 — factory: takes the ActorState and returns a %% 1-arity stage fun the pipeline driver can fold. This is how %% signature checking gets composed into a stage list at runtime %% (the static `inbound_stages/0` list omits it precisely because %% ActorState isn't available at static-list build time). stage_signature(ActorState) -> fun (Activity) -> envelope:verify_signature(Activity, ActorState) end. %% stage_replay/2 — checks the in-memory log for an existing %% activity with the same :id. Returns ok if the activity is new, %% `{error, replay}` if the log already carries it, `{error, no_id}` %% if the activity has no :id field. The check is linear scan of %% log entries; the projection scheduler (Step 7) will eventually %% maintain a CID index that turns this into O(1). stage_replay(Activity, LogState) -> case envelope:get_field(id, Activity) of not_found -> {error, no_id}; {ok, Id} -> case log_has_id(Id, log:entries(LogState)) of true -> {error, replay}; false -> ok end end. stage_replay(LogState) -> fun (Activity) -> stage_replay(Activity, LogState) end. log_has_id(_, []) -> false; log_has_id(Id, [Act | Rest]) -> case envelope:get_field(id, Act) of {ok, Id} -> true; _ -> log_has_id(Id, Rest) end. %% stage_schema/2 — validates the activity's :object against the %% schema registered for its :type. SchemaLookup is a caller- %% supplied fun (Type) -> {ok, SchemaFn} | not_found; SchemaFn is %% itself a fun (Object) -> bool. Returns: %% ok when the schema accepts the object %% {error, no_type} when the activity has no :type %% {error, schema_mismatch} when SchemaFn returned false %% %% Open-world default: an unregistered Type returns ok so the %% pipeline doesn't block activities the kernel hasn't yet learned %% about. Tightening to strict-world happens later in milestone 2. %% %% Activities with no :object skip the schema check (some verbs %% legitimately carry no object). %% %% The Erlang-fun shape is the substrate-friendly stand-in for the %% SX-source :schema bodies stored in the genesis bundle. Once an %% SX-source eval bridge exists, the same stage shape will dispatch %% through it instead — no API change. stage_schema(Activity, SchemaLookup) -> case envelope:get_field(type, Activity) of not_found -> {error, no_type}; {ok, Type} -> case SchemaLookup(Type) of not_found -> ok; {ok, SchemaFn} -> check_object_schema(Activity, SchemaFn) end end. check_object_schema(Activity, SchemaFn) -> case envelope:get_field(object, Activity) of not_found -> ok; {ok, Obj} -> case SchemaFn(Obj) of true -> ok; false -> {error, schema_mismatch} end end. stage_schema(SchemaLookup) -> fun (Activity) -> stage_schema(Activity, SchemaLookup) end. %% ── host-type fed Step 4: object-schema validation stage ──────── %% %% apply_object_schema/2 — when an inbound activity's :object declares %% a refinement type ({type, TypeName} on the object), resolve that %% type's record and apply its refinement schema to the object's %% :field_values. Sits between activity-type (stage_schema) validation %% and the kernel append; rejects the activity on schema-fail. %% %% Resolution mirrors the design note: TypeName -> TypeCid via Cfg's %% `type_index` ([{TypeName, TypeCid}, ...], the local Define-name %% index), then TypeCid -> TypeRecord via peer_types:lookup_or_fetch/2 %% (a local cache hit, or a wire fetch through the Cfg type_fetch_fn). %% %% Outcomes: %% object has no {type, _} -> ok (no schema applies) %% TypeName not in type_index -> ok (undeclared type; %% open-world default) %% record resolved, schema passes -> ok %% record resolved, schema fails -> {error, {validation_failed, %% object_schema}} %% record unresolvable (cache miss + -> strict_object_schema: %% fetch failure / no peer_types) true -> {error, ...} %% false -> ok (skipped) %% %% Default strict_object_schema = false: a node only blocks on an %% unresolvable type when it opts into airtight validation via Cfg %% {strict_object_schema, true}. The non-strict skip is where a %% `validation_skipped` log entry belongs (left to the caller's logger %% so this stage keeps the ok | {error, _} contract run_stages wants). %% %% A TypeRecord's refinement schema is either a 1-arity Erlang %% predicate over the field-values (the substrate stand-in, for %% locally-defined types) or a data constraint {required, [Field, ...]} %% (term_codec-safe, so a wire-fetched TypeRecord can still validate). apply_object_schema(Activity, Cfg) -> case object_type_name(Activity) of none -> ok; {ok, TypeName} -> case type_cid_for(TypeName, Cfg) of none -> ok; {ok, TypeCid} -> case resolve_type_record(TypeCid, Cfg) of {ok, TR} -> check_object_against(Activity, TR); {error, _} -> on_unresolved_type(Cfg) end end end. stage_object_schema(Cfg) -> fun (Activity) -> apply_object_schema(Activity, Cfg) end. object_type_name(Activity) -> case envelope:get_field(object, Activity) of {ok, Obj} when is_list(Obj) -> case envelope:get_field(type, Obj) of {ok, T} -> {ok, T}; _ -> none end; _ -> none end. object_field_values(Activity) -> case envelope:get_field(object, Activity) of {ok, Obj} when is_list(Obj) -> case envelope:get_field(field_values, Obj) of {ok, FV} -> FV; _ -> [] end; _ -> [] end. type_cid_for(TypeName, Cfg) -> case stage_field(type_index, Cfg) of nil -> none; Index -> case find_keyed(TypeName, Index) of {ok, Cid} -> {ok, Cid}; _ -> none end end. resolve_type_record(TypeCid, Cfg) -> case stage_field(peer_types, Cfg) of nil -> {error, no_peer_types}; _ -> case erlang:whereis(peer_types) of undefined -> {error, peer_types_down}; _ -> peer_types:lookup_or_fetch(TypeCid, Cfg) end end. on_unresolved_type(Cfg) -> case stage_field(strict_object_schema, Cfg) of true -> {error, {validation_failed, object_schema}}; _ -> ok end. check_object_against(Activity, TR) -> case stage_field(refinement_schema, TR) of nil -> ok; Schema -> apply_refinement(Schema, object_field_values(Activity)) end. apply_refinement(Fn, FieldValues) when is_function(Fn, 1) -> case Fn(FieldValues) of true -> ok; _ -> {error, {validation_failed, object_schema}} end; apply_refinement({required, Fields}, FieldValues) -> case all_present(Fields, FieldValues) of true -> ok; false -> {error, {validation_failed, object_schema}} end; apply_refinement(_, _) -> ok. all_present([], _) -> true; all_present([F | Rest], FV) -> case has_key(F, FV) of true -> all_present(Rest, FV); false -> false end. has_key(_, []) -> false; has_key(K, [{K, _} | _]) -> true; has_key(K, [_ | Rest]) -> has_key(K, Rest). stage_field(K, [{K, V} | _]) -> V; stage_field(K, [_ | Rest]) -> stage_field(K, Rest); stage_field(_, []) -> nil. find_keyed(_, []) -> {error, not_found}; find_keyed(K, [{K, V} | _]) -> {ok, V}; find_keyed(K, [_ | Rest]) -> find_keyed(K, Rest).