-module(pipeline). -export([run_stages/2, validate_inbound/1, validate_inbound/3, validate_outbound/1, inbound_stages/0, inbound_stages/2, outbound_stages/0, stage_envelope/1, stage_signature/1, stage_signature/2, stage_replay/1, stage_replay/2, stage_schema/1, stage_schema/2, apply_object_schema/2, stage_object_schema/1, apply_triggers/3]). %% Validation pipeline per design §14. %% %% A stage is a 1-arity fun `(Activity) -> ok | {error, Reason}`. %% The driver folds the activity through the stage list, halting %% on the first error. The pure-functional driver itself takes a %% stage list directly so tests can inject ad-hoc stage sequences %% without depending on the bundled inbound/outbound lists. %% %% Inbound pipeline (full set per design §14): envelope, signature, %% replay, audience, activity_schema, object_schema, content_validators, %% capabilities, trust. Outbound is a subset (no replay, no trust; %% auth handled at the HTTP layer). %% %% This sub-deliverable (6a) wires only the driver and the empty %% stage lists. Concrete stages land in 6b-6c. run_stages(_Activity, []) -> ok; run_stages(Activity, [Stage | Rest]) -> Result = Stage(Activity), case Result of ok -> run_stages(Activity, Rest); {error, _} -> Result end. validate_inbound(Activity) -> run_stages(Activity, inbound_stages()). %% validate_inbound/3 — Step 5b federation inbound pipeline. %% %% Activity: the signed envelope as received from the peer. %% PeerActorState: the peer's actor-state proplist carrying %% :public_keys for signature verification. Caller %% resolves this — for v2 it's either pre-populated %% from a peer-actors cache (Step 5c) or known from %% a two-instance test fixture. %% InboxLog: the receiving actor's :actor_inbox log state. %% Used by stage_replay to reject duplicate :id. %% %% Stages (per design §13.2 + §14): %% stage_envelope — shape check %% stage_signature(PeerAS) — peer sig verify %% stage_replay(InboxLog) — replay defence against %% receiving actor's inbox %% %% Returns ok | {error, Reason}. The driver halts on first failure. %% Audience / schema / capabilities / trust stages defer to v3. validate_inbound(Activity, PeerActorState, InboxLog) -> run_stages(Activity, inbound_stages(PeerActorState, InboxLog)). validate_outbound(Activity) -> run_stages(Activity, outbound_stages()). inbound_stages() -> [fun (A) -> stage_envelope(A) end]. %% inbound_stages/2 — the full ordered stage list for federation %% inbound (envelope -> peer sig -> replay against inbox). inbound_stages(PeerActorState, InboxLog) -> [fun (A) -> stage_envelope(A) end, stage_signature(PeerActorState), stage_replay(InboxLog)]. outbound_stages() -> [fun (A) -> stage_envelope(A) end]. %% ── Concrete stages ───────────────────────────────────────────── %% stage_envelope/1 — wrap envelope:validate_shape/1. The pipeline %% driver expects ok | {error, R}; validate_shape returns exactly %% that, so delegation is direct. stage_envelope(Activity) -> envelope:validate_shape(Activity). %% stage_signature/2 — direct (Activity, ActorState) check. Wraps %% envelope:verify_signature/2 from Step 2c. Useful for tests and %% for callers that already have ActorState in scope. stage_signature(Activity, ActorState) -> envelope:verify_signature(Activity, ActorState). %% stage_signature/1 — factory: takes the ActorState and returns a %% 1-arity stage fun the pipeline driver can fold. This is how %% signature checking gets composed into a stage list at runtime %% (the static `inbound_stages/0` list omits it precisely because %% ActorState isn't available at static-list build time). stage_signature(ActorState) -> fun (Activity) -> envelope:verify_signature(Activity, ActorState) end. %% stage_replay/2 — checks the in-memory log for an existing %% activity with the same :id. Returns ok if the activity is new, %% `{error, replay}` if the log already carries it, `{error, no_id}` %% if the activity has no :id field. The check is linear scan of %% log entries; the projection scheduler (Step 7) will eventually %% maintain a CID index that turns this into O(1). stage_replay(Activity, LogState) -> case envelope:get_field(id, Activity) of not_found -> {error, no_id}; {ok, Id} -> case log_has_id(Id, log:entries(LogState)) of true -> {error, replay}; false -> ok end end. stage_replay(LogState) -> fun (Activity) -> stage_replay(Activity, LogState) end. log_has_id(_, []) -> false; log_has_id(Id, [Act | Rest]) -> case envelope:get_field(id, Act) of {ok, Id} -> true; _ -> log_has_id(Id, Rest) end. %% stage_schema/2 — validates the activity's :object against the %% schema registered for its :type. SchemaLookup is a caller- %% supplied fun (Type) -> {ok, SchemaFn} | not_found; SchemaFn is %% itself a fun (Object) -> bool. Returns: %% ok when the schema accepts the object %% {error, no_type} when the activity has no :type %% {error, schema_mismatch} when SchemaFn returned false %% %% Open-world default: an unregistered Type returns ok so the %% pipeline doesn't block activities the kernel hasn't yet learned %% about. Tightening to strict-world happens later in milestone 2. %% %% Activities with no :object skip the schema check (some verbs %% legitimately carry no object). %% %% The Erlang-fun shape is the substrate-friendly stand-in for the %% SX-source :schema bodies stored in the genesis bundle. Once an %% SX-source eval bridge exists, the same stage shape will dispatch %% through it instead — no API change. stage_schema(Activity, SchemaLookup) -> case envelope:get_field(type, Activity) of not_found -> {error, no_type}; {ok, Type} -> case SchemaLookup(Type) of not_found -> ok; {ok, SchemaFn} -> check_object_schema(Activity, SchemaFn) end end. check_object_schema(Activity, SchemaFn) -> case envelope:get_field(object, Activity) of not_found -> ok; {ok, Obj} -> case SchemaFn(Obj) of true -> ok; false -> {error, schema_mismatch} end end. stage_schema(SchemaLookup) -> fun (Activity) -> stage_schema(Activity, SchemaLookup) end. %% ── host-type fed Step 4: object-schema validation stage ──────── %% %% apply_object_schema/2 — when an inbound activity's :object declares %% a refinement type ({type, TypeName} on the object), resolve that %% type's record and apply its refinement schema to the object's %% :field_values. Sits between activity-type (stage_schema) validation %% and the kernel append; rejects the activity on schema-fail. %% %% Resolution mirrors the design note: TypeName -> TypeCid via Cfg's %% `type_index` ([{TypeName, TypeCid}, ...], the local Define-name %% index), then TypeCid -> TypeRecord via peer_types:lookup_or_fetch/2 %% (a local cache hit, or a wire fetch through the Cfg type_fetch_fn). %% %% Outcomes: %% object has no {type, _} -> ok (no schema applies) %% TypeName not in type_index -> ok (undeclared type; %% open-world default) %% record resolved, schema passes -> ok %% record resolved, schema fails -> {error, {validation_failed, %% object_schema}} %% record unresolvable (cache miss + -> strict_object_schema: %% fetch failure / no peer_types) true -> {error, ...} %% false -> ok (skipped) %% %% Default strict_object_schema = false: a node only blocks on an %% unresolvable type when it opts into airtight validation via Cfg %% {strict_object_schema, true}. The non-strict skip is where a %% `validation_skipped` log entry belongs (left to the caller's logger %% so this stage keeps the ok | {error, _} contract run_stages wants). %% %% A TypeRecord's refinement schema is either a 1-arity Erlang %% predicate over the field-values (the substrate stand-in, for %% locally-defined types) or a data constraint {required, [Field, ...]} %% (term_codec-safe, so a wire-fetched TypeRecord can still validate). apply_object_schema(Activity, Cfg) -> case object_type_name(Activity) of none -> ok; {ok, TypeName} -> case type_cid_for(TypeName, Cfg) of none -> ok; {ok, TypeCid} -> case resolve_type_record(TypeCid, Cfg) of {ok, TR} -> check_object_against(Activity, TR); {error, _} -> on_unresolved_type(Cfg) end end end. stage_object_schema(Cfg) -> fun (Activity) -> apply_object_schema(Activity, Cfg) end. object_type_name(Activity) -> case envelope:get_field(object, Activity) of {ok, Obj} when is_list(Obj) -> case envelope:get_field(type, Obj) of {ok, T} -> {ok, T}; _ -> none end; _ -> none end. object_field_values(Activity) -> case envelope:get_field(object, Activity) of {ok, Obj} when is_list(Obj) -> case envelope:get_field(field_values, Obj) of {ok, FV} -> FV; _ -> [] end; _ -> [] end. type_cid_for(TypeName, Cfg) -> case stage_field(type_index, Cfg) of nil -> none; Index -> case find_keyed(TypeName, Index) of {ok, Cid} -> {ok, Cid}; _ -> none end end. resolve_type_record(TypeCid, Cfg) -> case stage_field(peer_types, Cfg) of nil -> {error, no_peer_types}; _ -> case erlang:whereis(peer_types) of undefined -> {error, peer_types_down}; _ -> peer_types:lookup_or_fetch(TypeCid, Cfg) end end. on_unresolved_type(Cfg) -> case stage_field(strict_object_schema, Cfg) of true -> {error, {validation_failed, object_schema}}; _ -> ok end. check_object_against(Activity, TR) -> case stage_field(refinement_schema, TR) of nil -> ok; Schema -> apply_refinement(Schema, object_field_values(Activity)) end. apply_refinement(Fn, FieldValues) when is_function(Fn, 1) -> case Fn(FieldValues) of true -> ok; _ -> {error, {validation_failed, object_schema}} end; apply_refinement({required, Fields}, FieldValues) -> case all_present(Fields, FieldValues) of true -> ok; false -> {error, {validation_failed, object_schema}} end; apply_refinement(_, _) -> ok. all_present([], _) -> true; all_present([F | Rest], FV) -> case has_key(F, FV) of true -> all_present(Rest, FV); false -> false end. has_key(_, []) -> false; has_key(K, [{K, _} | _]) -> true; has_key(K, [_ | Rest]) -> has_key(K, Rest). stage_field(K, [{K, V} | _]) -> V; stage_field(K, [_ | Rest]) -> stage_field(K, Rest); stage_field(_, []) -> nil. find_keyed(_, []) -> {error, not_found}; find_keyed(K, [{K, V} | _]) -> {ok, V}; find_keyed(K, [_ | Rest]) -> find_keyed(K, Rest). %% ── fed-sx triggers Step 2: post-append fan-out ───────────────── %% %% apply_triggers/3 — fires the durable flows bound to an activity's %% type AFTER it has been accepted and appended (rejected activities %% never reach here, so a flow only runs for an activity that really %% landed). For each spec the activity's type is bound to, the spec %% must pass its guard/actor-scope, and its {ActivityCid, TriggerCid} %% pair must not already have fired (federation can deliver the same %% activity twice via different peers — dedup is keyed on that pair, %% read from the receiving actor's :triggers_fired). Surviving specs are %% dispatched via flow_dispatch:start (a native flow_store:start), which %% never raises. %% %% Returns {ok, Results} where Results is one %% {ActivityCid, TriggerCid, {ok, FlowId} | {error, Reason}} %% per spec actually dispatched (guard-passed, not a duplicate). The %% kernel folds the {ActivityCid, TriggerCid} pairs into the actor's %% :triggers_fired (dedup) and the audit triples into its projection. %% No matching/ready registry yields {ok, []}. %% %% Cfg gates the fan-out on {trigger_registry, trigger_registry} (the %% registered gen_server), mirroring the object-schema stage's %% {peer_types, _} gate. apply_triggers must NOT be called inside a %% `try` — flow_dispatch does gen_server:calls, and a blocking call %% inside a try deadlocks this scheduler; the fan-out runs after append, %% in its own step, so this is naturally satisfied. apply_triggers(Activity, ActorState, Cfg) -> case trigger_registry_ready(Cfg) of false -> {ok, []}; true -> Type = activity_type_of(Activity), Specs = trigger_registry:lookup(Type), ActCid = trigger_activity_cid(Activity), Fired = field_or_default(triggers_fired, ActorState, []), fire_each(Specs, Activity, ActorState, ActCid, Fired, Cfg, []) end. trigger_registry_ready(Cfg) -> case stage_field(trigger_registry, Cfg) of nil -> false; _ -> case erlang:whereis(trigger_registry) of undefined -> false; _ -> true end end. fire_each([], _A, _AS, _ACid, _Fired, _Cfg, Acc) -> {ok, lists:reverse(Acc)}; fire_each([Spec | Rest], A, AS, ACid, Fired, Cfg, Acc) -> TCid = trigger_registry:spec_cid(Spec), Pair = {ACid, TCid}, AlreadyFired = pair_member(Pair, Fired) orelse acc_member(Pair, Acc), Pass = (not AlreadyFired) andalso flow_dispatch:guard_passes(Spec, A, AS), case Pass of false -> fire_each(Rest, A, AS, ACid, Fired, Cfg, Acc); true -> Outcome = case flow_dispatch:start(Spec, A, AS, Cfg) of {ok, FlowId, _Audit} -> {ok, FlowId}; {error, Reason} -> {error, Reason} end, fire_each(Rest, A, AS, ACid, Fired, Cfg, [{ACid, TCid, Outcome} | Acc]) end. activity_type_of(Activity) -> case envelope:get_field(type, Activity) of {ok, Type} -> Type; _ -> undefined end. trigger_activity_cid(Activity) -> case envelope:get_field(id, Activity) of {ok, Cid} -> Cid; _ -> undefined end. field_or_default(Key, Proplist, Default) -> case envelope:get_field(Key, Proplist) of {ok, V} -> V; _ -> Default end. %% pair_member/2 — {ACid, TCid} present in a [{ACid, TCid}] fired list. pair_member(_, []) -> false; pair_member(P, [P | _]) -> true; pair_member(P, [_ | Rest]) -> pair_member(P, Rest). %% acc_member/2 — {ACid, TCid} already dispatched this call (Acc holds %% {ACid, TCid, Outcome} triples). acc_member(_, []) -> false; acc_member({A, T}, [{A, T, _} | _]) -> true; acc_member(P, [_ | Rest]) -> acc_member(P, Rest).