Import core (art-dag) as core/

2026-02-24 23:09:39 +00:00
parent d77241602f cc2dcbddd4
commit 97d4d4ce21
80 changed files with 25711 additions and 0 deletions
--- a/core/.gitignore
+++ b/core/.gitignore
@@ -0,0 +1,47 @@
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Environment
+.env
+.venv
+env/
+venv/
+
+# Private keys (ActivityPub secrets)
+.cache/
+
+# Test outputs
+test_cache/
+test_plan_output.json
+analysis.json
+plan.json
+plan_with_analysis.json
--- a/core/README.md
+++ b/core/README.md
@@ -0,0 +1,110 @@
+# artdag
+
+Content-addressed DAG execution engine with ActivityPub ownership.
+
+## Features
+
+- **Content-addressed nodes**: `node_id = SHA3-256(type + config + inputs)` for automatic deduplication
+- **Quantum-resistant hashing**: SHA-3 throughout for future-proof integrity
+- **ActivityPub ownership**: Cryptographically signed ownership claims
+- **Federated identity**: `@user@artdag.rose-ash.com` style identities
+- **Pluggable executors**: Register custom node types
+- **Built-in video primitives**: SOURCE, SEGMENT, RESIZE, TRANSFORM, SEQUENCE, MUX, BLEND
+
+## Installation
+
+```bash
+pip install -e .
+```
+
+### Optional: External Effect Tools
+
+Some effects can use external tools for better performance:
+
+**Pixelsort** (glitch art pixel sorting):
+```bash
+# Rust CLI (recommended - fast)
+cargo install --git https://github.com/Void-ux/pixelsort.git pixelsort
+
+# Or Python CLI
+pip install git+https://github.com/Blotz/pixelsort-cli
+```
+
+**Datamosh** (video glitch/corruption):
+```bash
+# FFglitch (recommended)
+./scripts/install-ffglitch.sh
+
+# Or Python CLI
+pip install git+https://github.com/tiberiuiancu/datamoshing
+```
+
+Check available tools:
+```bash
+python -m artdag.sexp.external_tools
+```
+
+## Quick Start
+
+```python
+from artdag import Engine, DAGBuilder, Registry
+from artdag.activitypub import OwnershipManager
+
+# Create ownership manager
+manager = OwnershipManager("./my_registry")
+
+# Create your identity
+actor = manager.create_actor("alice", "Alice")
+print(f"Created: {actor.handle}")  # @alice@artdag.rose-ash.com
+
+# Register an asset with ownership
+asset, activity = manager.register_asset(
+    actor=actor,
+    name="my_image",
+    path="/path/to/image.jpg",
+    tags=["photo", "art"],
+)
+print(f"Owned: {asset.name} (hash: {asset.content_hash})")
+
+# Build and execute a DAG
+engine = Engine("./cache")
+builder = DAGBuilder()
+
+source = builder.source(str(asset.path))
+resized = builder.resize(source, width=1920, height=1080)
+builder.set_output(resized)
+
+result = engine.execute(builder.build())
+print(f"Output: {result.output_path}")
+```
+
+## Architecture
+
+```
+artdag/
+├── dag.py           # Node, DAG, DAGBuilder
+├── cache.py         # Content-addressed file cache
+├── executor.py      # Base executor + registry
+├── engine.py        # DAG execution engine
+├── activitypub/     # Identity + ownership
+│   ├── actor.py     # Actor identity with RSA keys
+│   ├── activity.py  # Create, Announce activities
+│   ├── signatures.py # RSA signing/verification
+│   └── ownership.py # Links actors to assets
+├── nodes/           # Built-in executors
+│   ├── source.py    # SOURCE
+│   ├── transform.py # SEGMENT, RESIZE, TRANSFORM
+│   ├── compose.py   # SEQUENCE, LAYER, MUX, BLEND
+│   └── effect.py    # EFFECT (identity, etc.)
+└── effects/         # Effect implementations
+    └── identity/    # The foundational identity effect
+```
+
+## Related Repos
+
+- **Registry**: https://git.rose-ash.com/art-dag/registry - Asset registry with ownership proofs
+- **Recipes**: https://git.rose-ash.com/art-dag/recipes - DAG recipes using effects
+
+## License
+
+MIT
--- a/core/artdag/init.py
+++ b/core/artdag/init.py
@@ -0,0 +1,61 @@
+# artdag - Content-addressed DAG execution engine with ActivityPub ownership
+#
+# A standalone execution engine that processes directed acyclic graphs (DAGs)
+# where each node represents an operation. Nodes are content-addressed for
+# automatic caching and deduplication.
+#
+# Core concepts:
+# - Node: An operation with type, config, and inputs
+# - DAG: A graph of nodes with a designated output node
+# - Executor: Implements the actual operation for a node type
+# - Engine: Executes DAGs by resolving dependencies and running executors
+
+from .dag import Node, DAG, DAGBuilder, NodeType
+from .cache import Cache, CacheEntry
+from .executor import Executor, register_executor, get_executor
+from .engine import Engine
+from .registry import Registry, Asset
+from .activities import Activity, ActivityStore, ActivityManager, make_is_shared_fn
+
+# Analysis and planning modules (optional, require extra dependencies)
+try:
+    from .analysis import Analyzer, AnalysisResult
+except ImportError:
+    Analyzer = None
+    AnalysisResult = None
+
+try:
+    from .planning import RecipePlanner, ExecutionPlan, ExecutionStep
+except ImportError:
+    RecipePlanner = None
+    ExecutionPlan = None
+    ExecutionStep = None
+
+__all__ = [
+    # Core
+    "Node",
+    "DAG",
+    "DAGBuilder",
+    "NodeType",
+    "Cache",
+    "CacheEntry",
+    "Executor",
+    "register_executor",
+    "get_executor",
+    "Engine",
+    "Registry",
+    "Asset",
+    "Activity",
+    "ActivityStore",
+    "ActivityManager",
+    "make_is_shared_fn",
+    # Analysis (optional)
+    "Analyzer",
+    "AnalysisResult",
+    # Planning (optional)
+    "RecipePlanner",
+    "ExecutionPlan",
+    "ExecutionStep",
+]
+
+__version__ = "0.1.0"
--- a/core/artdag/activities.py
+++ b/core/artdag/activities.py
@@ -0,0 +1,371 @@
+# artdag/activities.py
+"""
+Persistent activity (job) tracking for cache management.
+
+Activities represent executions of DAGs. They track:
+- Input node IDs (sources)
+- Output node ID (terminal node)
+- Intermediate node IDs (everything in between)
+
+This enables deletion rules:
+- Shared items (ActivityPub published) cannot be deleted
+- Inputs/outputs of activities cannot be deleted
+- Intermediates can be deleted (reconstructible)
+- Activities can only be discarded if no items are shared
+"""
+
+import json
+import logging
+import time
+import uuid
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Set
+
+from .cache import Cache, CacheEntry
+from .dag import DAG
+
+logger = logging.getLogger(__name__)
+
+
+def make_is_shared_fn(activitypub_store: "ActivityStore") -> Callable[[str], bool]:
+    """
+    Create an is_shared function from an ActivityPub ActivityStore.
+
+    Args:
+        activitypub_store: The ActivityPub activity store
+            (from artdag.activitypub.activity)
+
+    Returns:
+        Function that checks if a cid has been published
+    """
+    def is_shared(cid: str) -> bool:
+        activities = activitypub_store.find_by_object_hash(cid)
+        return any(a.activity_type == "Create" for a in activities)
+    return is_shared
+
+
+@dataclass
+class Activity:
+    """
+    A recorded execution of a DAG.
+
+    Tracks which cache entries are inputs, outputs, and intermediates
+    to enforce deletion rules.
+    """
+    activity_id: str
+    input_ids: List[str]        # Source node cache IDs
+    output_id: str              # Terminal node cache ID
+    intermediate_ids: List[str] # Everything in between
+    created_at: float
+    status: str = "completed"   # pending|running|completed|failed
+    dag_snapshot: Optional[Dict[str, Any]] = None  # Serialized DAG for reconstruction
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "activity_id": self.activity_id,
+            "input_ids": self.input_ids,
+            "output_id": self.output_id,
+            "intermediate_ids": self.intermediate_ids,
+            "created_at": self.created_at,
+            "status": self.status,
+            "dag_snapshot": self.dag_snapshot,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Activity":
+        return cls(
+            activity_id=data["activity_id"],
+            input_ids=data["input_ids"],
+            output_id=data["output_id"],
+            intermediate_ids=data["intermediate_ids"],
+            created_at=data["created_at"],
+            status=data.get("status", "completed"),
+            dag_snapshot=data.get("dag_snapshot"),
+        )
+
+    @classmethod
+    def from_dag(cls, dag: DAG, activity_id: str = None) -> "Activity":
+        """
+        Create an Activity from a DAG.
+
+        Classifies nodes as inputs, output, or intermediates.
+        """
+        if activity_id is None:
+            activity_id = str(uuid.uuid4())
+
+        # Find input nodes (nodes with no inputs - sources)
+        input_ids = []
+        for node_id, node in dag.nodes.items():
+            if not node.inputs:
+                input_ids.append(node_id)
+
+        # Output is the terminal node
+        output_id = dag.output_id
+
+        # Intermediates are everything else
+        intermediate_ids = []
+        for node_id in dag.nodes:
+            if node_id not in input_ids and node_id != output_id:
+                intermediate_ids.append(node_id)
+
+        return cls(
+            activity_id=activity_id,
+            input_ids=sorted(input_ids),
+            output_id=output_id,
+            intermediate_ids=sorted(intermediate_ids),
+            created_at=time.time(),
+            status="completed",
+            dag_snapshot=dag.to_dict(),
+        )
+
+    @property
+    def all_node_ids(self) -> List[str]:
+        """All node IDs involved in this activity."""
+        return self.input_ids + [self.output_id] + self.intermediate_ids
+
+
+class ActivityStore:
+    """
+    Persistent storage for activities.
+
+    Provides methods to check deletion eligibility and perform deletions.
+    """
+
+    def __init__(self, store_dir: Path | str):
+        self.store_dir = Path(store_dir)
+        self.store_dir.mkdir(parents=True, exist_ok=True)
+        self._activities: Dict[str, Activity] = {}
+        self._load()
+
+    def _index_path(self) -> Path:
+        return self.store_dir / "activities.json"
+
+    def _load(self):
+        """Load activities from disk."""
+        index_path = self._index_path()
+        if index_path.exists():
+            try:
+                with open(index_path) as f:
+                    data = json.load(f)
+                self._activities = {
+                    a["activity_id"]: Activity.from_dict(a)
+                    for a in data.get("activities", [])
+                }
+            except (json.JSONDecodeError, KeyError) as e:
+                logger.warning(f"Failed to load activities: {e}")
+                self._activities = {}
+
+    def _save(self):
+        """Save activities to disk."""
+        data = {
+            "version": "1.0",
+            "activities": [a.to_dict() for a in self._activities.values()],
+        }
+        with open(self._index_path(), "w") as f:
+            json.dump(data, f, indent=2)
+
+    def add(self, activity: Activity) -> None:
+        """Add an activity."""
+        self._activities[activity.activity_id] = activity
+        self._save()
+
+    def get(self, activity_id: str) -> Optional[Activity]:
+        """Get an activity by ID."""
+        return self._activities.get(activity_id)
+
+    def remove(self, activity_id: str) -> bool:
+        """Remove an activity record (does not delete cache entries)."""
+        if activity_id not in self._activities:
+            return False
+        del self._activities[activity_id]
+        self._save()
+        return True
+
+    def list(self) -> List[Activity]:
+        """List all activities."""
+        return list(self._activities.values())
+
+    def find_by_input_ids(self, input_ids: List[str]) -> List[Activity]:
+        """Find activities with the same inputs (for UI grouping)."""
+        sorted_inputs = sorted(input_ids)
+        return [
+            a for a in self._activities.values()
+            if sorted(a.input_ids) == sorted_inputs
+        ]
+
+    def find_using_node(self, node_id: str) -> List[Activity]:
+        """Find all activities that reference a node ID."""
+        return [
+            a for a in self._activities.values()
+            if node_id in a.all_node_ids
+        ]
+
+    def __len__(self) -> int:
+        return len(self._activities)
+
+
+class ActivityManager:
+    """
+    Manages activities and cache deletion with sharing rules.
+
+    Deletion rules:
+    1. Shared items (ActivityPub published) cannot be deleted
+    2. Inputs/outputs of activities cannot be deleted
+    3. Intermediates can be deleted (reconstructible)
+    4. Activities can only be discarded if no items are shared
+    """
+
+    def __init__(
+        self,
+        cache: Cache,
+        activity_store: ActivityStore,
+        is_shared_fn: Callable[[str], bool],
+    ):
+        """
+        Args:
+            cache: The L1 cache
+            activity_store: Activity persistence
+            is_shared_fn: Function that checks if a cid is shared
+                          (published via ActivityPub)
+        """
+        self.cache = cache
+        self.activities = activity_store
+        self._is_shared = is_shared_fn
+
+    def record_activity(self, dag: DAG) -> Activity:
+        """Record a completed DAG execution as an activity."""
+        activity = Activity.from_dag(dag)
+        self.activities.add(activity)
+        return activity
+
+    def is_shared(self, node_id: str) -> bool:
+        """Check if a cache entry is shared (published via ActivityPub)."""
+        entry = self.cache.get_entry(node_id)
+        if not entry or not entry.cid:
+            return False
+        return self._is_shared(entry.cid)
+
+    def can_delete_cache_entry(self, node_id: str) -> bool:
+        """
+        Check if a cache entry can be deleted.
+
+        Returns False if:
+        - Entry is shared (ActivityPub published)
+        - Entry is an input or output of any activity
+        """
+        # Check if shared
+        if self.is_shared(node_id):
+            return False
+
+        # Check if it's an input or output of any activity
+        for activity in self.activities.list():
+            if node_id in activity.input_ids:
+                return False
+            if node_id == activity.output_id:
+                return False
+
+        # It's either an intermediate or orphaned - can delete
+        return True
+
+    def can_discard_activity(self, activity_id: str) -> bool:
+        """
+        Check if an activity can be discarded.
+
+        Returns False if any cache entry (input, output, or intermediate)
+        is shared via ActivityPub.
+        """
+        activity = self.activities.get(activity_id)
+        if not activity:
+            return False
+
+        # Check if any item is shared
+        for node_id in activity.all_node_ids:
+            if self.is_shared(node_id):
+                return False
+
+        return True
+
+    def discard_activity(self, activity_id: str) -> bool:
+        """
+        Discard an activity and delete its intermediate cache entries.
+
+        Returns False if the activity cannot be discarded (has shared items).
+
+        When discarded:
+        - Intermediate cache entries are deleted
+        - The activity record is removed
+        - Inputs remain (may be used by other activities)
+        - Output is deleted if orphaned (not shared, not used elsewhere)
+        """
+        if not self.can_discard_activity(activity_id):
+            return False
+
+        activity = self.activities.get(activity_id)
+        if not activity:
+            return False
+
+        output_id = activity.output_id
+        intermediate_ids = list(activity.intermediate_ids)
+
+        # Remove the activity record first
+        self.activities.remove(activity_id)
+
+        # Delete intermediates
+        for node_id in intermediate_ids:
+            self.cache.remove(node_id)
+            logger.debug(f"Deleted intermediate: {node_id}")
+
+        # Check if output is now orphaned
+        if self._is_orphaned(output_id) and not self.is_shared(output_id):
+            self.cache.remove(output_id)
+            logger.debug(f"Deleted orphaned output: {output_id}")
+
+        # Inputs remain - they may be used by other activities
+        # But check if any are orphaned now
+        for input_id in activity.input_ids:
+            if self._is_orphaned(input_id) and not self.is_shared(input_id):
+                self.cache.remove(input_id)
+                logger.debug(f"Deleted orphaned input: {input_id}")
+
+        return True
+
+    def _is_orphaned(self, node_id: str) -> bool:
+        """Check if a node is not referenced by any activity."""
+        for activity in self.activities.list():
+            if node_id in activity.all_node_ids:
+                return False
+        return True
+
+    def get_deletable_entries(self) -> List[CacheEntry]:
+        """Get all cache entries that can be deleted."""
+        deletable = []
+        for entry in self.cache.list_entries():
+            if self.can_delete_cache_entry(entry.node_id):
+                deletable.append(entry)
+        return deletable
+
+    def get_discardable_activities(self) -> List[Activity]:
+        """Get all activities that can be discarded."""
+        return [
+            a for a in self.activities.list()
+            if self.can_discard_activity(a.activity_id)
+        ]
+
+    def cleanup_intermediates(self) -> int:
+        """
+        Delete all intermediate cache entries.
+
+        Intermediates are safe to delete as they can be reconstructed
+        from inputs using the DAG.
+
+        Returns:
+            Number of entries deleted
+        """
+        deleted = 0
+        for activity in self.activities.list():
+            for node_id in activity.intermediate_ids:
+                if self.cache.has(node_id):
+                    self.cache.remove(node_id)
+                    deleted += 1
+        return deleted
--- a/core/artdag/activitypub/init.py
+++ b/core/artdag/activitypub/init.py
@@ -0,0 +1,33 @@
+# primitive/activitypub/__init__.py
+"""
+ActivityPub implementation for Art DAG.
+
+Provides decentralized identity and ownership for assets.
+Domain: artdag.rose-ash.com
+
+Core concepts:
+- Actor: A user identity with cryptographic keys
+- Object: An asset (image, video, etc.)
+- Activity: An action (Create, Announce, Like, etc.)
+- Signature: Cryptographic proof of authorship
+"""
+
+from .actor import Actor, ActorStore
+from .activity import Activity, CreateActivity, ActivityStore
+from .signatures import sign_activity, verify_signature, verify_activity_ownership
+from .ownership import OwnershipManager, OwnershipRecord
+
+__all__ = [
+    "Actor",
+    "ActorStore",
+    "Activity",
+    "CreateActivity",
+    "ActivityStore",
+    "sign_activity",
+    "verify_signature",
+    "verify_activity_ownership",
+    "OwnershipManager",
+    "OwnershipRecord",
+]
+
+DOMAIN = "artdag.rose-ash.com"
--- a/core/artdag/activitypub/activity.py
+++ b/core/artdag/activitypub/activity.py
@@ -0,0 +1,203 @@
+# primitive/activitypub/activity.py
+"""
+ActivityPub Activity types.
+
+Activities represent actions taken by actors on objects.
+Key activity types for Art DAG:
+- Create: Actor creates/claims ownership of an object
+- Announce: Actor shares/boosts an object
+- Like: Actor endorses an object
+"""
+
+import json
+import time
+import uuid
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from .actor import Actor, DOMAIN
+
+
+def _generate_id() -> str:
+    """Generate unique activity ID."""
+    return str(uuid.uuid4())
+
+
+@dataclass
+class Activity:
+    """
+    Base ActivityPub Activity.
+
+    Attributes:
+        activity_id: Unique identifier
+        activity_type: Type (Create, Announce, Like, etc.)
+        actor_id: ID of the actor performing the activity
+        object_data: The object of the activity
+        published: ISO timestamp
+        signature: Cryptographic signature (added after signing)
+    """
+    activity_id: str
+    activity_type: str
+    actor_id: str
+    object_data: Dict[str, Any]
+    published: str = field(default_factory=lambda: time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
+    signature: Optional[Dict[str, Any]] = None
+
+    def to_activitypub(self) -> Dict[str, Any]:
+        """Return ActivityPub JSON-LD representation."""
+        activity = {
+            "@context": "https://www.w3.org/ns/activitystreams",
+            "type": self.activity_type,
+            "id": f"https://{DOMAIN}/activities/{self.activity_id}",
+            "actor": self.actor_id,
+            "object": self.object_data,
+            "published": self.published,
+        }
+        if self.signature:
+            activity["signature"] = self.signature
+        return activity
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Serialize for storage."""
+        return {
+            "activity_id": self.activity_id,
+            "activity_type": self.activity_type,
+            "actor_id": self.actor_id,
+            "object_data": self.object_data,
+            "published": self.published,
+            "signature": self.signature,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Activity":
+        """Deserialize from storage."""
+        return cls(
+            activity_id=data["activity_id"],
+            activity_type=data["activity_type"],
+            actor_id=data["actor_id"],
+            object_data=data["object_data"],
+            published=data.get("published", ""),
+            signature=data.get("signature"),
+        )
+
+
+@dataclass
+class CreateActivity(Activity):
+    """
+    Create activity - establishes ownership of an object.
+
+    Used when an actor creates or claims an asset.
+    """
+    activity_type: str = field(default="Create", init=False)
+
+    @classmethod
+    def for_asset(
+        cls,
+        actor: Actor,
+        asset_name: str,
+        cid: str,
+        asset_type: str = "Image",
+        metadata: Dict[str, Any] = None,
+    ) -> "CreateActivity":
+        """
+        Create a Create activity for an asset.
+
+        Args:
+            actor: The actor claiming ownership
+            asset_name: Name of the asset
+            cid: SHA-3 hash of the asset content
+            asset_type: ActivityPub object type (Image, Video, Audio, etc.)
+            metadata: Additional metadata
+
+        Returns:
+            CreateActivity establishing ownership
+        """
+        object_data = {
+            "type": asset_type,
+            "name": asset_name,
+            "id": f"https://{DOMAIN}/objects/{cid}",
+            "contentHash": {
+                "algorithm": "sha3-256",
+                "value": cid,
+            },
+            "attributedTo": actor.id,
+        }
+        if metadata:
+            object_data["metadata"] = metadata
+
+        return cls(
+            activity_id=_generate_id(),
+            actor_id=actor.id,
+            object_data=object_data,
+        )
+
+
+class ActivityStore:
+    """
+    Persistent storage for activities.
+
+    Activities are stored as an append-only log for auditability.
+    """
+
+    def __init__(self, store_dir: Path | str):
+        self.store_dir = Path(store_dir)
+        self.store_dir.mkdir(parents=True, exist_ok=True)
+        self._activities: List[Activity] = []
+        self._load()
+
+    def _log_path(self) -> Path:
+        return self.store_dir / "activities.json"
+
+    def _load(self):
+        """Load activities from disk."""
+        log_path = self._log_path()
+        if log_path.exists():
+            with open(log_path) as f:
+                data = json.load(f)
+            self._activities = [
+                Activity.from_dict(a) for a in data.get("activities", [])
+            ]
+
+    def _save(self):
+        """Save activities to disk."""
+        data = {
+            "version": "1.0",
+            "activities": [a.to_dict() for a in self._activities],
+        }
+        with open(self._log_path(), "w") as f:
+            json.dump(data, f, indent=2)
+
+    def add(self, activity: Activity) -> None:
+        """Add an activity to the log."""
+        self._activities.append(activity)
+        self._save()
+
+    def get(self, activity_id: str) -> Optional[Activity]:
+        """Get an activity by ID."""
+        for a in self._activities:
+            if a.activity_id == activity_id:
+                return a
+        return None
+
+    def list(self) -> List[Activity]:
+        """List all activities."""
+        return list(self._activities)
+
+    def find_by_actor(self, actor_id: str) -> List[Activity]:
+        """Find activities by actor."""
+        return [a for a in self._activities if a.actor_id == actor_id]
+
+    def find_by_object_hash(self, cid: str) -> List[Activity]:
+        """Find activities referencing an object by hash."""
+        results = []
+        for a in self._activities:
+            obj_hash = a.object_data.get("contentHash", {})
+            if isinstance(obj_hash, dict) and obj_hash.get("value") == cid:
+                results.append(a)
+            elif a.object_data.get("contentHash") == cid:
+                results.append(a)
+        return results
+
+    def __len__(self) -> int:
+        return len(self._activities)
--- a/core/artdag/activitypub/actor.py
+++ b/core/artdag/activitypub/actor.py
@@ -0,0 +1,206 @@
+# primitive/activitypub/actor.py
+"""
+ActivityPub Actor management.
+
+An Actor is an identity with:
+- Username and display name
+- RSA key pair for signing
+- ActivityPub-compliant JSON-LD representation
+"""
+
+import json
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, Optional
+from cryptography.hazmat.primitives import hashes, serialization
+from cryptography.hazmat.primitives.asymmetric import rsa, padding
+
+DOMAIN = "artdag.rose-ash.com"
+
+
+def _generate_keypair() -> tuple[bytes, bytes]:
+    """Generate RSA key pair for signing."""
+    private_key = rsa.generate_private_key(
+        public_exponent=65537,
+        key_size=2048,
+    )
+    private_pem = private_key.private_bytes(
+        encoding=serialization.Encoding.PEM,
+        format=serialization.PrivateFormat.PKCS8,
+        encryption_algorithm=serialization.NoEncryption(),
+    )
+    public_pem = private_key.public_key().public_bytes(
+        encoding=serialization.Encoding.PEM,
+        format=serialization.PublicFormat.SubjectPublicKeyInfo,
+    )
+    return private_pem, public_pem
+
+
+@dataclass
+class Actor:
+    """
+    An ActivityPub Actor (identity).
+
+    Attributes:
+        username: Unique username (e.g., "giles")
+        display_name: Human-readable name
+        public_key: PEM-encoded public key
+        private_key: PEM-encoded private key (kept secret)
+        created_at: Timestamp of creation
+    """
+    username: str
+    display_name: str
+    public_key: bytes
+    private_key: bytes
+    created_at: float = field(default_factory=time.time)
+    domain: str = DOMAIN
+
+    @property
+    def id(self) -> str:
+        """ActivityPub actor ID (URL)."""
+        return f"https://{self.domain}/users/{self.username}"
+
+    @property
+    def handle(self) -> str:
+        """Fediverse handle."""
+        return f"@{self.username}@{self.domain}"
+
+    @property
+    def inbox(self) -> str:
+        """ActivityPub inbox URL."""
+        return f"{self.id}/inbox"
+
+    @property
+    def outbox(self) -> str:
+        """ActivityPub outbox URL."""
+        return f"{self.id}/outbox"
+
+    @property
+    def key_id(self) -> str:
+        """Key ID for HTTP Signatures."""
+        return f"{self.id}#main-key"
+
+    def to_activitypub(self) -> Dict[str, Any]:
+        """Return ActivityPub JSON-LD representation."""
+        return {
+            "@context": [
+                "https://www.w3.org/ns/activitystreams",
+                "https://w3id.org/security/v1",
+            ],
+            "type": "Person",
+            "id": self.id,
+            "preferredUsername": self.username,
+            "name": self.display_name,
+            "inbox": self.inbox,
+            "outbox": self.outbox,
+            "publicKey": {
+                "id": self.key_id,
+                "owner": self.id,
+                "publicKeyPem": self.public_key.decode("utf-8"),
+            },
+        }
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Serialize for storage."""
+        return {
+            "username": self.username,
+            "display_name": self.display_name,
+            "public_key": self.public_key.decode("utf-8"),
+            "private_key": self.private_key.decode("utf-8"),
+            "created_at": self.created_at,
+            "domain": self.domain,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Actor":
+        """Deserialize from storage."""
+        return cls(
+            username=data["username"],
+            display_name=data["display_name"],
+            public_key=data["public_key"].encode("utf-8"),
+            private_key=data["private_key"].encode("utf-8"),
+            created_at=data.get("created_at", time.time()),
+            domain=data.get("domain", DOMAIN),
+        )
+
+    @classmethod
+    def create(cls, username: str, display_name: str = None) -> "Actor":
+        """Create a new actor with generated keys."""
+        private_pem, public_pem = _generate_keypair()
+        return cls(
+            username=username,
+            display_name=display_name or username,
+            public_key=public_pem,
+            private_key=private_pem,
+        )
+
+
+class ActorStore:
+    """
+    Persistent storage for actors.
+
+    Structure:
+        store_dir/
+            actors.json       # Index of all actors
+            keys/
+                <username>.private.pem
+                <username>.public.pem
+    """
+
+    def __init__(self, store_dir: Path | str):
+        self.store_dir = Path(store_dir)
+        self.store_dir.mkdir(parents=True, exist_ok=True)
+        self._actors: Dict[str, Actor] = {}
+        self._load()
+
+    def _index_path(self) -> Path:
+        return self.store_dir / "actors.json"
+
+    def _load(self):
+        """Load actors from disk."""
+        index_path = self._index_path()
+        if index_path.exists():
+            with open(index_path) as f:
+                data = json.load(f)
+            self._actors = {
+                username: Actor.from_dict(actor_data)
+                for username, actor_data in data.get("actors", {}).items()
+            }
+
+    def _save(self):
+        """Save actors to disk."""
+        data = {
+            "version": "1.0",
+            "domain": DOMAIN,
+            "actors": {
+                username: actor.to_dict()
+                for username, actor in self._actors.items()
+            },
+        }
+        with open(self._index_path(), "w") as f:
+            json.dump(data, f, indent=2)
+
+    def create(self, username: str, display_name: str = None) -> Actor:
+        """Create and store a new actor."""
+        if username in self._actors:
+            raise ValueError(f"Actor {username} already exists")
+
+        actor = Actor.create(username, display_name)
+        self._actors[username] = actor
+        self._save()
+        return actor
+
+    def get(self, username: str) -> Optional[Actor]:
+        """Get an actor by username."""
+        return self._actors.get(username)
+
+    def list(self) -> list[Actor]:
+        """List all actors."""
+        return list(self._actors.values())
+
+    def __contains__(self, username: str) -> bool:
+        return username in self._actors
+
+    def __len__(self) -> int:
+        return len(self._actors)
--- a/core/artdag/activitypub/ownership.py
+++ b/core/artdag/activitypub/ownership.py
@@ -0,0 +1,226 @@
+# primitive/activitypub/ownership.py
+"""
+Ownership integration between ActivityPub and Registry.
+
+Connects actors, activities, and assets to establish provable ownership.
+"""
+
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from .actor import Actor, ActorStore
+from .activity import Activity, CreateActivity, ActivityStore
+from .signatures import sign_activity, verify_activity_ownership
+from ..registry import Registry, Asset
+
+
+@dataclass
+class OwnershipRecord:
+    """
+    A verified ownership record linking actor to asset.
+
+    Attributes:
+        actor_handle: The actor's fediverse handle
+        asset_name: Name of the owned asset
+        cid: SHA-3 hash of the asset
+        activity_id: ID of the Create activity establishing ownership
+        verified: Whether the signature has been verified
+    """
+    actor_handle: str
+    asset_name: str
+    cid: str
+    activity_id: str
+    verified: bool = False
+
+
+class OwnershipManager:
+    """
+    Manages ownership relationships between actors and assets.
+
+    Integrates:
+    - ActorStore: Identity management
+    - Registry: Asset storage
+    - ActivityStore: Ownership activities
+    """
+
+    def __init__(self, base_dir: Path | str):
+        self.base_dir = Path(base_dir)
+        self.base_dir.mkdir(parents=True, exist_ok=True)
+
+        # Initialize stores
+        self.actors = ActorStore(self.base_dir / "actors")
+        self.activities = ActivityStore(self.base_dir / "activities")
+        self.registry = Registry(self.base_dir / "registry")
+
+    def create_actor(self, username: str, display_name: str = None) -> Actor:
+        """Create a new actor identity."""
+        return self.actors.create(username, display_name)
+
+    def get_actor(self, username: str) -> Optional[Actor]:
+        """Get an actor by username."""
+        return self.actors.get(username)
+
+    def register_asset(
+        self,
+        actor: Actor,
+        name: str,
+        cid: str,
+        url: str = None,
+        local_path: Path | str = None,
+        tags: List[str] = None,
+        metadata: Dict[str, Any] = None,
+    ) -> tuple[Asset, Activity]:
+        """
+        Register an asset and establish ownership.
+
+        Creates the asset in the registry and a signed Create activity
+        proving the actor's ownership.
+
+        Args:
+            actor: The actor claiming ownership
+            name: Name for the asset
+            cid: SHA-3-256 hash of the content
+            url: Public URL (canonical location)
+            local_path: Optional local path
+            tags: Optional tags
+            metadata: Optional metadata
+
+        Returns:
+            Tuple of (Asset, signed CreateActivity)
+        """
+        # Add to registry
+        asset = self.registry.add(
+            name=name,
+            cid=cid,
+            url=url,
+            local_path=local_path,
+            tags=tags,
+            metadata=metadata,
+        )
+
+        # Create ownership activity
+        activity = CreateActivity.for_asset(
+            actor=actor,
+            asset_name=name,
+            cid=asset.cid,
+            asset_type=self._asset_type_to_ap(asset.asset_type),
+            metadata=metadata,
+        )
+
+        # Sign the activity
+        signed_activity = sign_activity(activity, actor)
+
+        # Store the activity
+        self.activities.add(signed_activity)
+
+        return asset, signed_activity
+
+    def _asset_type_to_ap(self, asset_type: str) -> str:
+        """Convert registry asset type to ActivityPub type."""
+        type_map = {
+            "image": "Image",
+            "video": "Video",
+            "audio": "Audio",
+            "unknown": "Document",
+        }
+        return type_map.get(asset_type, "Document")
+
+    def get_owner(self, asset_name: str) -> Optional[Actor]:
+        """
+        Get the owner of an asset.
+
+        Finds the earliest Create activity for the asset and returns
+        the actor if the signature is valid.
+        """
+        asset = self.registry.get(asset_name)
+        if not asset:
+            return None
+
+        # Find Create activities for this asset
+        activities = self.activities.find_by_object_hash(asset.cid)
+        create_activities = [a for a in activities if a.activity_type == "Create"]
+
+        if not create_activities:
+            return None
+
+        # Get the earliest (first owner)
+        earliest = min(create_activities, key=lambda a: a.published)
+
+        # Extract username from actor_id
+        # Format: https://artdag.rose-ash.com/users/{username}
+        actor_id = earliest.actor_id
+        if "/users/" in actor_id:
+            username = actor_id.split("/users/")[-1]
+            actor = self.actors.get(username)
+            if actor and verify_activity_ownership(earliest, actor):
+                return actor
+
+        return None
+
+    def verify_ownership(self, asset_name: str, actor: Actor) -> bool:
+        """
+        Verify that an actor owns an asset.
+
+        Checks for a valid signed Create activity linking the actor
+        to the asset.
+        """
+        asset = self.registry.get(asset_name)
+        if not asset:
+            return False
+
+        activities = self.activities.find_by_object_hash(asset.cid)
+        for activity in activities:
+            if activity.activity_type == "Create" and activity.actor_id == actor.id:
+                if verify_activity_ownership(activity, actor):
+                    return True
+
+        return False
+
+    def list_owned_assets(self, actor: Actor) -> List[Asset]:
+        """List all assets owned by an actor."""
+        activities = self.activities.find_by_actor(actor.id)
+        owned = []
+
+        for activity in activities:
+            if activity.activity_type == "Create":
+                # Find asset by hash
+                obj_hash = activity.object_data.get("contentHash", {})
+                if isinstance(obj_hash, dict):
+                    hash_value = obj_hash.get("value")
+                else:
+                    hash_value = obj_hash
+
+                if hash_value:
+                    asset = self.registry.find_by_hash(hash_value)
+                    if asset:
+                        owned.append(asset)
+
+        return owned
+
+    def get_ownership_records(self) -> List[OwnershipRecord]:
+        """Get all ownership records."""
+        records = []
+
+        for activity in self.activities.list():
+            if activity.activity_type != "Create":
+                continue
+
+            # Extract info
+            actor_id = activity.actor_id
+            username = actor_id.split("/users/")[-1] if "/users/" in actor_id else "unknown"
+            actor = self.actors.get(username)
+
+            obj_hash = activity.object_data.get("contentHash", {})
+            hash_value = obj_hash.get("value") if isinstance(obj_hash, dict) else obj_hash
+
+            records.append(OwnershipRecord(
+                actor_handle=actor.handle if actor else f"@{username}@unknown",
+                asset_name=activity.object_data.get("name", "unknown"),
+                cid=hash_value or "unknown",
+                activity_id=activity.activity_id,
+                verified=verify_activity_ownership(activity, actor) if actor else False,
+            ))
+
+        return records
--- a/core/artdag/activitypub/signatures.py
+++ b/core/artdag/activitypub/signatures.py
@@ -0,0 +1,163 @@
+# primitive/activitypub/signatures.py
+"""
+Cryptographic signatures for ActivityPub.
+
+Uses RSA-SHA256 signatures compatible with HTTP Signatures spec
+and Linked Data Signatures for ActivityPub.
+"""
+
+import base64
+import hashlib
+import json
+import time
+from typing import Any, Dict
+
+from cryptography.hazmat.primitives import hashes, serialization
+from cryptography.hazmat.primitives.asymmetric import padding, rsa
+from cryptography.exceptions import InvalidSignature
+
+from .actor import Actor
+from .activity import Activity
+
+
+def _canonicalize(data: Dict[str, Any]) -> str:
+    """
+    Canonicalize JSON for signing.
+
+    Uses JCS (JSON Canonicalization Scheme) - sorted keys, no whitespace.
+    """
+    return json.dumps(data, sort_keys=True, separators=(",", ":"))
+
+
+def _hash_sha256(data: str) -> bytes:
+    """Hash string with SHA-256."""
+    return hashlib.sha256(data.encode()).digest()
+
+
+def sign_activity(activity: Activity, actor: Actor) -> Activity:
+    """
+    Sign an activity with the actor's private key.
+
+    Uses Linked Data Signatures with RsaSignature2017.
+
+    Args:
+        activity: The activity to sign
+        actor: The actor whose key signs the activity
+
+    Returns:
+        Activity with signature attached
+    """
+    # Load private key
+    private_key = serialization.load_pem_private_key(
+        actor.private_key,
+        password=None,
+    )
+
+    # Create signature options
+    created = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+
+    # Canonicalize the activity (without signature)
+    activity_data = activity.to_activitypub()
+    activity_data.pop("signature", None)
+    canonical = _canonicalize(activity_data)
+
+    # Create the data to sign: hash of options + hash of document
+    options = {
+        "@context": "https://w3id.org/security/v1",
+        "type": "RsaSignature2017",
+        "creator": actor.key_id,
+        "created": created,
+    }
+    options_hash = _hash_sha256(_canonicalize(options))
+    document_hash = _hash_sha256(canonical)
+    to_sign = options_hash + document_hash
+
+    # Sign with RSA-SHA256
+    signature_bytes = private_key.sign(
+        to_sign,
+        padding.PKCS1v15(),
+        hashes.SHA256(),
+    )
+    signature_value = base64.b64encode(signature_bytes).decode("utf-8")
+
+    # Attach signature to activity
+    activity.signature = {
+        "type": "RsaSignature2017",
+        "creator": actor.key_id,
+        "created": created,
+        "signatureValue": signature_value,
+    }
+
+    return activity
+
+
+def verify_signature(activity: Activity, public_key_pem: bytes) -> bool:
+    """
+    Verify an activity's signature.
+
+    Args:
+        activity: The activity with signature
+        public_key_pem: PEM-encoded public key
+
+    Returns:
+        True if signature is valid
+    """
+    if not activity.signature:
+        return False
+
+    try:
+        # Load public key
+        public_key = serialization.load_pem_public_key(public_key_pem)
+
+        # Reconstruct signature options
+        options = {
+            "@context": "https://w3id.org/security/v1",
+            "type": activity.signature["type"],
+            "creator": activity.signature["creator"],
+            "created": activity.signature["created"],
+        }
+
+        # Canonicalize activity without signature
+        activity_data = activity.to_activitypub()
+        activity_data.pop("signature", None)
+        canonical = _canonicalize(activity_data)
+
+        # Recreate signed data
+        options_hash = _hash_sha256(_canonicalize(options))
+        document_hash = _hash_sha256(canonical)
+        signed_data = options_hash + document_hash
+
+        # Decode and verify signature
+        signature_bytes = base64.b64decode(activity.signature["signatureValue"])
+        public_key.verify(
+            signature_bytes,
+            signed_data,
+            padding.PKCS1v15(),
+            hashes.SHA256(),
+        )
+        return True
+
+    except (InvalidSignature, KeyError, ValueError):
+        return False
+
+
+def verify_activity_ownership(activity: Activity, actor: Actor) -> bool:
+    """
+    Verify that an activity was signed by the claimed actor.
+
+    Args:
+        activity: The activity to verify
+        actor: The claimed actor
+
+    Returns:
+        True if the activity was signed by this actor
+    """
+    if not activity.signature:
+        return False
+
+    # Check creator matches actor
+    if activity.signature.get("creator") != actor.key_id:
+        return False
+
+    # Verify signature
+    return verify_signature(activity, actor.public_key)
--- a/core/artdag/analysis/init.py
+++ b/core/artdag/analysis/init.py
@@ -0,0 +1,26 @@
+# artdag/analysis - Audio and video feature extraction
+#
+# Provides the Analysis phase of the 3-phase execution model:
+# 1. ANALYZE - Extract features from inputs
+# 2. PLAN - Generate execution plan with cache IDs
+# 3. EXECUTE - Run steps with caching
+
+from .schema import (
+    AnalysisResult,
+    AudioFeatures,
+    VideoFeatures,
+    BeatInfo,
+    EnergyEnvelope,
+    SpectrumBands,
+)
+from .analyzer import Analyzer
+
+__all__ = [
+    "Analyzer",
+    "AnalysisResult",
+    "AudioFeatures",
+    "VideoFeatures",
+    "BeatInfo",
+    "EnergyEnvelope",
+    "SpectrumBands",
+]
--- a/core/artdag/analysis/analyzer.py
+++ b/core/artdag/analysis/analyzer.py
@@ -0,0 +1,282 @@
+# artdag/analysis/analyzer.py
+"""
+Main Analyzer class for the Analysis phase.
+
+Coordinates audio and video feature extraction with caching.
+"""
+
+import json
+import logging
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Dict, List, Optional
+
+from .schema import AnalysisResult, AudioFeatures, VideoFeatures
+from .audio import analyze_audio, FEATURE_ALL as AUDIO_ALL
+from .video import analyze_video, FEATURE_ALL as VIDEO_ALL
+
+logger = logging.getLogger(__name__)
+
+
+class AnalysisCache:
+    """
+    Simple file-based cache for analysis results.
+
+    Stores results as JSON files keyed by analysis cache_id.
+    """
+
+    def __init__(self, cache_dir: Path):
+        self.cache_dir = Path(cache_dir)
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+
+    def _path_for(self, cache_id: str) -> Path:
+        """Get cache file path for a cache_id."""
+        return self.cache_dir / f"{cache_id}.json"
+
+    def get(self, cache_id: str) -> Optional[AnalysisResult]:
+        """Retrieve cached analysis result."""
+        path = self._path_for(cache_id)
+        if not path.exists():
+            return None
+
+        try:
+            with open(path, "r") as f:
+                data = json.load(f)
+            return AnalysisResult.from_dict(data)
+        except (json.JSONDecodeError, KeyError) as e:
+            logger.warning(f"Failed to load analysis cache {cache_id}: {e}")
+            return None
+
+    def put(self, result: AnalysisResult) -> None:
+        """Store analysis result in cache."""
+        path = self._path_for(result.cache_id)
+        with open(path, "w") as f:
+            json.dump(result.to_dict(), f, indent=2)
+
+    def has(self, cache_id: str) -> bool:
+        """Check if analysis result is cached."""
+        return self._path_for(cache_id).exists()
+
+    def remove(self, cache_id: str) -> bool:
+        """Remove cached analysis result."""
+        path = self._path_for(cache_id)
+        if path.exists():
+            path.unlink()
+            return True
+        return False
+
+
+class Analyzer:
+    """
+    Analyzes media inputs to extract features.
+
+    The Analyzer is the first phase of the 3-phase execution model.
+    It extracts features from inputs that inform downstream processing.
+
+    Example:
+        analyzer = Analyzer(cache_dir=Path("./analysis_cache"))
+
+        # Analyze a music file for beats
+        result = analyzer.analyze(
+            input_path=Path("/path/to/music.mp3"),
+            input_hash="abc123...",
+            features=["beats", "energy"]
+        )
+
+        print(f"Tempo: {result.tempo} BPM")
+        print(f"Beats: {result.beat_times}")
+    """
+
+    def __init__(
+        self,
+        cache_dir: Optional[Path] = None,
+        content_cache: Optional["Cache"] = None,  # artdag.Cache for input lookup
+    ):
+        """
+        Initialize the Analyzer.
+
+        Args:
+            cache_dir: Directory for analysis cache. If None, no caching.
+            content_cache: artdag Cache for looking up inputs by hash
+        """
+        self.cache = AnalysisCache(cache_dir) if cache_dir else None
+        self.content_cache = content_cache
+
+    def get_input_path(self, input_hash: str, input_path: Optional[Path] = None) -> Path:
+        """
+        Resolve input to a file path.
+
+        Args:
+            input_hash: Content hash of the input
+            input_path: Optional direct path to file
+
+        Returns:
+            Path to the input file
+
+        Raises:
+            ValueError: If input cannot be resolved
+        """
+        if input_path and input_path.exists():
+            return input_path
+
+        if self.content_cache:
+            entry = self.content_cache.get(input_hash)
+            if entry:
+                return Path(entry.output_path)
+
+        raise ValueError(f"Cannot resolve input {input_hash}: no path provided and not in cache")
+
+    def analyze(
+        self,
+        input_hash: str,
+        features: List[str],
+        input_path: Optional[Path] = None,
+        media_type: Optional[str] = None,
+    ) -> AnalysisResult:
+        """
+        Analyze an input file and extract features.
+
+        Args:
+            input_hash: Content hash of the input (for cache key)
+            features: List of features to extract:
+                Audio: "beats", "tempo", "energy", "spectrum", "onsets"
+                Video: "metadata", "motion_tempo", "scene_changes"
+                Meta: "all" (extracts all relevant features)
+            input_path: Optional direct path to file
+            media_type: Optional hint ("audio", "video", or None for auto-detect)
+
+        Returns:
+            AnalysisResult with extracted features
+        """
+        # Compute cache ID
+        temp_result = AnalysisResult(
+            input_hash=input_hash,
+            features_requested=sorted(features),
+        )
+        cache_id = temp_result.cache_id
+
+        # Check cache
+        if self.cache and self.cache.has(cache_id):
+            cached = self.cache.get(cache_id)
+            if cached:
+                logger.info(f"Analysis cache hit: {cache_id[:16]}...")
+                return cached
+
+        # Resolve input path
+        path = self.get_input_path(input_hash, input_path)
+        logger.info(f"Analyzing {path} for features: {features}")
+
+        # Detect media type if not specified
+        if media_type is None:
+            media_type = self._detect_media_type(path)
+
+        # Extract features
+        audio_features = None
+        video_features = None
+
+        # Normalize features
+        if "all" in features:
+            audio_features_list = [AUDIO_ALL]
+            video_features_list = [VIDEO_ALL]
+        else:
+            audio_features_list = [f for f in features if f in ("beats", "tempo", "energy", "spectrum", "onsets")]
+            video_features_list = [f for f in features if f in ("metadata", "motion_tempo", "scene_changes")]
+
+        if media_type in ("audio", "video") and audio_features_list:
+            try:
+                audio_features = analyze_audio(path, features=audio_features_list)
+            except Exception as e:
+                logger.warning(f"Audio analysis failed: {e}")
+
+        if media_type == "video" and video_features_list:
+            try:
+                video_features = analyze_video(path, features=video_features_list)
+            except Exception as e:
+                logger.warning(f"Video analysis failed: {e}")
+
+        result = AnalysisResult(
+            input_hash=input_hash,
+            features_requested=sorted(features),
+            audio=audio_features,
+            video=video_features,
+            analyzed_at=datetime.now(timezone.utc).isoformat(),
+        )
+
+        # Cache result
+        if self.cache:
+            self.cache.put(result)
+
+        return result
+
+    def analyze_multiple(
+        self,
+        inputs: Dict[str, Path],
+        features: List[str],
+    ) -> Dict[str, AnalysisResult]:
+        """
+        Analyze multiple inputs.
+
+        Args:
+            inputs: Dict mapping input_hash to file path
+            features: Features to extract from all inputs
+
+        Returns:
+            Dict mapping input_hash to AnalysisResult
+        """
+        results = {}
+        for input_hash, input_path in inputs.items():
+            try:
+                results[input_hash] = self.analyze(
+                    input_hash=input_hash,
+                    features=features,
+                    input_path=input_path,
+                )
+            except Exception as e:
+                logger.error(f"Analysis failed for {input_hash}: {e}")
+                raise
+
+        return results
+
+    def _detect_media_type(self, path: Path) -> str:
+        """
+        Detect if file is audio or video.
+
+        Args:
+            path: Path to media file
+
+        Returns:
+            "audio" or "video"
+        """
+        import subprocess
+        import json
+
+        cmd = [
+            "ffprobe", "-v", "quiet",
+            "-print_format", "json",
+            "-show_streams",
+            str(path)
+        ]
+
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            data = json.loads(result.stdout)
+            streams = data.get("streams", [])
+
+            has_video = any(s.get("codec_type") == "video" for s in streams)
+            has_audio = any(s.get("codec_type") == "audio" for s in streams)
+
+            if has_video:
+                return "video"
+            elif has_audio:
+                return "audio"
+            else:
+                return "unknown"
+
+        except (subprocess.CalledProcessError, json.JSONDecodeError):
+            # Fall back to extension-based detection
+            ext = path.suffix.lower()
+            if ext in (".mp4", ".mov", ".avi", ".mkv", ".webm"):
+                return "video"
+            elif ext in (".mp3", ".wav", ".flac", ".ogg", ".m4a", ".aac"):
+                return "audio"
+            return "unknown"
--- a/core/artdag/analysis/audio.py
+++ b/core/artdag/analysis/audio.py
@@ -0,0 +1,336 @@
+# artdag/analysis/audio.py
+"""
+Audio feature extraction.
+
+Uses librosa for beat detection, energy analysis, and spectral features.
+Falls back to basic ffprobe if librosa is not available.
+"""
+
+import json
+import logging
+import subprocess
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+from .schema import AudioFeatures, BeatInfo, EnergyEnvelope, SpectrumBands
+
+logger = logging.getLogger(__name__)
+
+# Feature names for requesting specific analysis
+FEATURE_BEATS = "beats"
+FEATURE_TEMPO = "tempo"
+FEATURE_ENERGY = "energy"
+FEATURE_SPECTRUM = "spectrum"
+FEATURE_ONSETS = "onsets"
+FEATURE_ALL = "all"
+
+
+def _get_audio_info_ffprobe(path: Path) -> Tuple[float, int, int]:
+    """Get basic audio info using ffprobe."""
+    cmd = [
+        "ffprobe", "-v", "quiet",
+        "-print_format", "json",
+        "-show_streams",
+        "-select_streams", "a:0",
+        str(path)
+    ]
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        data = json.loads(result.stdout)
+        if not data.get("streams"):
+            raise ValueError("No audio stream found")
+
+        stream = data["streams"][0]
+        duration = float(stream.get("duration", 0))
+        sample_rate = int(stream.get("sample_rate", 44100))
+        channels = int(stream.get("channels", 2))
+        return duration, sample_rate, channels
+    except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError) as e:
+        logger.warning(f"ffprobe failed: {e}")
+        raise ValueError(f"Could not read audio info: {e}")
+
+
+def _extract_audio_to_wav(path: Path, duration: Optional[float] = None) -> Path:
+    """Extract audio to temporary WAV file for librosa processing."""
+    import tempfile
+    wav_path = Path(tempfile.mktemp(suffix=".wav"))
+
+    cmd = ["ffmpeg", "-y", "-i", str(path)]
+    if duration:
+        cmd.extend(["-t", str(duration)])
+    cmd.extend([
+        "-vn",  # No video
+        "-acodec", "pcm_s16le",
+        "-ar", "22050",  # Resample to 22050 Hz for librosa
+        "-ac", "1",  # Mono
+        str(wav_path)
+    ])
+
+    try:
+        subprocess.run(cmd, capture_output=True, check=True)
+        return wav_path
+    except subprocess.CalledProcessError as e:
+        logger.error(f"Audio extraction failed: {e.stderr}")
+        raise ValueError(f"Could not extract audio: {e}")
+
+
+def analyze_beats(path: Path, sample_rate: int = 22050) -> BeatInfo:
+    """
+    Detect beats and tempo using librosa.
+
+    Args:
+        path: Path to audio file (or pre-extracted WAV)
+        sample_rate: Sample rate for analysis
+
+    Returns:
+        BeatInfo with beat times, tempo, and confidence
+    """
+    try:
+        import librosa
+    except ImportError:
+        raise ImportError("librosa required for beat detection. Install with: pip install librosa")
+
+    # Load audio
+    y, sr = librosa.load(str(path), sr=sample_rate, mono=True)
+
+    # Detect tempo and beats
+    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
+
+    # Convert frames to times
+    beat_times = librosa.frames_to_time(beat_frames, sr=sr).tolist()
+
+    # Estimate confidence from onset strength consistency
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
+    beat_strength = onset_env[beat_frames] if len(beat_frames) > 0 else []
+    confidence = float(beat_strength.mean() / onset_env.max()) if len(beat_strength) > 0 and onset_env.max() > 0 else 0.5
+
+    # Detect downbeats (first beat of each bar)
+    # Use beat phase to estimate bar positions
+    downbeat_times = None
+    if len(beat_times) >= 4:
+        # Assume 4/4 time signature, downbeats every 4 beats
+        downbeat_times = [beat_times[i] for i in range(0, len(beat_times), 4)]
+
+    return BeatInfo(
+        beat_times=beat_times,
+        tempo=float(tempo) if hasattr(tempo, '__float__') else float(tempo[0]) if len(tempo) > 0 else 120.0,
+        confidence=min(1.0, max(0.0, confidence)),
+        downbeat_times=downbeat_times,
+        time_signature=4,
+    )
+
+
+def analyze_energy(path: Path, window_ms: float = 50.0, sample_rate: int = 22050) -> EnergyEnvelope:
+    """
+    Extract energy (loudness) envelope.
+
+    Args:
+        path: Path to audio file
+        window_ms: Analysis window size in milliseconds
+        sample_rate: Sample rate for analysis
+
+    Returns:
+        EnergyEnvelope with times and normalized values
+    """
+    try:
+        import librosa
+        import numpy as np
+    except ImportError:
+        raise ImportError("librosa and numpy required. Install with: pip install librosa numpy")
+
+    y, sr = librosa.load(str(path), sr=sample_rate, mono=True)
+
+    # Calculate frame size from window_ms
+    hop_length = int(sr * window_ms / 1000)
+
+    # RMS energy
+    rms = librosa.feature.rms(y=y, hop_length=hop_length)[0]
+
+    # Normalize to 0-1
+    rms_max = rms.max()
+    if rms_max > 0:
+        rms_normalized = rms / rms_max
+    else:
+        rms_normalized = rms
+
+    # Generate time points
+    times = librosa.frames_to_time(np.arange(len(rms)), sr=sr, hop_length=hop_length)
+
+    return EnergyEnvelope(
+        times=times.tolist(),
+        values=rms_normalized.tolist(),
+        window_ms=window_ms,
+    )
+
+
+def analyze_spectrum(
+    path: Path,
+    band_ranges: Optional[dict] = None,
+    window_ms: float = 50.0,
+    sample_rate: int = 22050
+) -> SpectrumBands:
+    """
+    Extract frequency band envelopes.
+
+    Args:
+        path: Path to audio file
+        band_ranges: Dict mapping band name to (low_hz, high_hz)
+        window_ms: Analysis window size
+        sample_rate: Sample rate
+
+    Returns:
+        SpectrumBands with bass, mid, high envelopes
+    """
+    try:
+        import librosa
+        import numpy as np
+    except ImportError:
+        raise ImportError("librosa and numpy required")
+
+    if band_ranges is None:
+        band_ranges = {
+            "bass": (20, 200),
+            "mid": (200, 2000),
+            "high": (2000, 20000),
+        }
+
+    y, sr = librosa.load(str(path), sr=sample_rate, mono=True)
+    hop_length = int(sr * window_ms / 1000)
+
+    # Compute STFT
+    n_fft = 2048
+    stft = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
+
+    # Frequency bins
+    freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
+
+    def band_energy(low_hz: float, high_hz: float) -> List[float]:
+        """Sum energy in frequency band."""
+        mask = (freqs >= low_hz) & (freqs <= high_hz)
+        if not mask.any():
+            return [0.0] * stft.shape[1]
+        band = stft[mask, :].sum(axis=0)
+        # Normalize
+        band_max = band.max()
+        if band_max > 0:
+            band = band / band_max
+        return band.tolist()
+
+    times = librosa.frames_to_time(np.arange(stft.shape[1]), sr=sr, hop_length=hop_length)
+
+    return SpectrumBands(
+        bass=band_energy(*band_ranges["bass"]),
+        mid=band_energy(*band_ranges["mid"]),
+        high=band_energy(*band_ranges["high"]),
+        times=times.tolist(),
+        band_ranges=band_ranges,
+    )
+
+
+def analyze_onsets(path: Path, sample_rate: int = 22050) -> List[float]:
+    """
+    Detect onset times (note/sound starts).
+
+    Args:
+        path: Path to audio file
+        sample_rate: Sample rate
+
+    Returns:
+        List of onset times in seconds
+    """
+    try:
+        import librosa
+    except ImportError:
+        raise ImportError("librosa required")
+
+    y, sr = librosa.load(str(path), sr=sample_rate, mono=True)
+
+    # Detect onsets
+    onset_frames = librosa.onset.onset_detect(y=y, sr=sr)
+    onset_times = librosa.frames_to_time(onset_frames, sr=sr)
+
+    return onset_times.tolist()
+
+
+def analyze_audio(
+    path: Path,
+    features: Optional[List[str]] = None,
+) -> AudioFeatures:
+    """
+    Extract audio features from file.
+
+    Args:
+        path: Path to audio/video file
+        features: List of features to extract. Options:
+            - "beats": Beat detection (tempo, beat times)
+            - "energy": Loudness envelope
+            - "spectrum": Frequency band envelopes
+            - "onsets": Note onset times
+            - "all": All features
+
+    Returns:
+        AudioFeatures with requested analysis
+    """
+    if features is None:
+        features = [FEATURE_ALL]
+
+    # Normalize features
+    if FEATURE_ALL in features:
+        features = [FEATURE_BEATS, FEATURE_ENERGY, FEATURE_SPECTRUM, FEATURE_ONSETS]
+
+    # Get basic info via ffprobe
+    duration, sample_rate, channels = _get_audio_info_ffprobe(path)
+
+    result = AudioFeatures(
+        duration=duration,
+        sample_rate=sample_rate,
+        channels=channels,
+    )
+
+    # Check if librosa is available for advanced features
+    try:
+        import librosa  # noqa: F401
+        has_librosa = True
+    except ImportError:
+        has_librosa = False
+        if any(f in features for f in [FEATURE_BEATS, FEATURE_ENERGY, FEATURE_SPECTRUM, FEATURE_ONSETS]):
+            logger.warning("librosa not available, skipping advanced audio features")
+
+    if not has_librosa:
+        return result
+
+    # Extract audio to WAV for librosa
+    wav_path = None
+    try:
+        wav_path = _extract_audio_to_wav(path)
+
+        if FEATURE_BEATS in features or FEATURE_TEMPO in features:
+            try:
+                result.beats = analyze_beats(wav_path)
+            except Exception as e:
+                logger.warning(f"Beat detection failed: {e}")
+
+        if FEATURE_ENERGY in features:
+            try:
+                result.energy = analyze_energy(wav_path)
+            except Exception as e:
+                logger.warning(f"Energy analysis failed: {e}")
+
+        if FEATURE_SPECTRUM in features:
+            try:
+                result.spectrum = analyze_spectrum(wav_path)
+            except Exception as e:
+                logger.warning(f"Spectrum analysis failed: {e}")
+
+        if FEATURE_ONSETS in features:
+            try:
+                result.onsets = analyze_onsets(wav_path)
+            except Exception as e:
+                logger.warning(f"Onset detection failed: {e}")
+
+    finally:
+        # Clean up temporary WAV file
+        if wav_path and wav_path.exists():
+            wav_path.unlink()
+
+    return result
--- a/core/artdag/analysis/schema.py
+++ b/core/artdag/analysis/schema.py
@@ -0,0 +1,352 @@
+# artdag/analysis/schema.py
+"""
+Data structures for analysis results.
+
+Analysis extracts features from input media that inform downstream processing.
+Results are cached by: analysis_cache_id = SHA3-256(input_hash + sorted(features))
+"""
+
+import hashlib
+import json
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Tuple
+
+
+def _stable_hash(data: Any, algorithm: str = "sha3_256") -> str:
+    """Create stable hash from arbitrary data."""
+    json_str = json.dumps(data, sort_keys=True, separators=(",", ":"))
+    hasher = hashlib.new(algorithm)
+    hasher.update(json_str.encode())
+    return hasher.hexdigest()
+
+
+@dataclass
+class BeatInfo:
+    """
+    Beat detection results.
+
+    Attributes:
+        beat_times: List of beat positions in seconds
+        tempo: Estimated tempo in BPM
+        confidence: Tempo detection confidence (0-1)
+        downbeat_times: First beat of each bar (if detected)
+        time_signature: Detected or assumed time signature (e.g., 4)
+    """
+    beat_times: List[float]
+    tempo: float
+    confidence: float = 1.0
+    downbeat_times: Optional[List[float]] = None
+    time_signature: int = 4
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "beat_times": self.beat_times,
+            "tempo": self.tempo,
+            "confidence": self.confidence,
+            "downbeat_times": self.downbeat_times,
+            "time_signature": self.time_signature,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "BeatInfo":
+        return cls(
+            beat_times=data["beat_times"],
+            tempo=data["tempo"],
+            confidence=data.get("confidence", 1.0),
+            downbeat_times=data.get("downbeat_times"),
+            time_signature=data.get("time_signature", 4),
+        )
+
+
+@dataclass
+class EnergyEnvelope:
+    """
+    Energy (loudness) over time.
+
+    Attributes:
+        times: Time points in seconds
+        values: Energy values (0-1, normalized)
+        window_ms: Analysis window size in milliseconds
+    """
+    times: List[float]
+    values: List[float]
+    window_ms: float = 50.0
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "times": self.times,
+            "values": self.values,
+            "window_ms": self.window_ms,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "EnergyEnvelope":
+        return cls(
+            times=data["times"],
+            values=data["values"],
+            window_ms=data.get("window_ms", 50.0),
+        )
+
+    def at_time(self, t: float) -> float:
+        """Interpolate energy value at given time."""
+        if not self.times:
+            return 0.0
+        if t <= self.times[0]:
+            return self.values[0]
+        if t >= self.times[-1]:
+            return self.values[-1]
+
+        # Binary search for bracketing indices
+        lo, hi = 0, len(self.times) - 1
+        while hi - lo > 1:
+            mid = (lo + hi) // 2
+            if self.times[mid] <= t:
+                lo = mid
+            else:
+                hi = mid
+
+        # Linear interpolation
+        t0, t1 = self.times[lo], self.times[hi]
+        v0, v1 = self.values[lo], self.values[hi]
+        alpha = (t - t0) / (t1 - t0) if t1 != t0 else 0
+        return v0 + alpha * (v1 - v0)
+
+
+@dataclass
+class SpectrumBands:
+    """
+    Frequency band envelopes over time.
+
+    Attributes:
+        bass: Low frequency envelope (20-200 Hz typical)
+        mid: Mid frequency envelope (200-2000 Hz typical)
+        high: High frequency envelope (2000-20000 Hz typical)
+        times: Time points in seconds
+        band_ranges: Frequency ranges for each band in Hz
+    """
+    bass: List[float]
+    mid: List[float]
+    high: List[float]
+    times: List[float]
+    band_ranges: Dict[str, Tuple[float, float]] = field(default_factory=lambda: {
+        "bass": (20, 200),
+        "mid": (200, 2000),
+        "high": (2000, 20000),
+    })
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "bass": self.bass,
+            "mid": self.mid,
+            "high": self.high,
+            "times": self.times,
+            "band_ranges": self.band_ranges,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "SpectrumBands":
+        return cls(
+            bass=data["bass"],
+            mid=data["mid"],
+            high=data["high"],
+            times=data["times"],
+            band_ranges=data.get("band_ranges", {
+                "bass": (20, 200),
+                "mid": (200, 2000),
+                "high": (2000, 20000),
+            }),
+        )
+
+
+@dataclass
+class AudioFeatures:
+    """
+    All extracted audio features.
+
+    Attributes:
+        duration: Audio duration in seconds
+        sample_rate: Sample rate in Hz
+        channels: Number of audio channels
+        beats: Beat detection results
+        energy: Energy envelope
+        spectrum: Frequency band envelopes
+        onsets: Note/sound onset times
+    """
+    duration: float
+    sample_rate: int
+    channels: int
+    beats: Optional[BeatInfo] = None
+    energy: Optional[EnergyEnvelope] = None
+    spectrum: Optional[SpectrumBands] = None
+    onsets: Optional[List[float]] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "duration": self.duration,
+            "sample_rate": self.sample_rate,
+            "channels": self.channels,
+            "beats": self.beats.to_dict() if self.beats else None,
+            "energy": self.energy.to_dict() if self.energy else None,
+            "spectrum": self.spectrum.to_dict() if self.spectrum else None,
+            "onsets": self.onsets,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "AudioFeatures":
+        return cls(
+            duration=data["duration"],
+            sample_rate=data["sample_rate"],
+            channels=data["channels"],
+            beats=BeatInfo.from_dict(data["beats"]) if data.get("beats") else None,
+            energy=EnergyEnvelope.from_dict(data["energy"]) if data.get("energy") else None,
+            spectrum=SpectrumBands.from_dict(data["spectrum"]) if data.get("spectrum") else None,
+            onsets=data.get("onsets"),
+        )
+
+
+@dataclass
+class VideoFeatures:
+    """
+    Extracted video features.
+
+    Attributes:
+        duration: Video duration in seconds
+        frame_rate: Frames per second
+        width: Frame width in pixels
+        height: Frame height in pixels
+        codec: Video codec name
+        motion_tempo: Estimated tempo from motion analysis (optional)
+        scene_changes: Times of detected scene changes
+    """
+    duration: float
+    frame_rate: float
+    width: int
+    height: int
+    codec: str = ""
+    motion_tempo: Optional[float] = None
+    scene_changes: Optional[List[float]] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "duration": self.duration,
+            "frame_rate": self.frame_rate,
+            "width": self.width,
+            "height": self.height,
+            "codec": self.codec,
+            "motion_tempo": self.motion_tempo,
+            "scene_changes": self.scene_changes,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "VideoFeatures":
+        return cls(
+            duration=data["duration"],
+            frame_rate=data["frame_rate"],
+            width=data["width"],
+            height=data["height"],
+            codec=data.get("codec", ""),
+            motion_tempo=data.get("motion_tempo"),
+            scene_changes=data.get("scene_changes"),
+        )
+
+
+@dataclass
+class AnalysisResult:
+    """
+    Complete analysis result for an input.
+
+    Combines audio and video features with metadata for caching.
+
+    Attributes:
+        input_hash: Content hash of the analyzed input
+        features_requested: List of features that were requested
+        audio: Audio features (if input has audio)
+        video: Video features (if input has video)
+        cache_id: Computed cache ID for this analysis
+        analyzed_at: Timestamp of analysis
+    """
+    input_hash: str
+    features_requested: List[str]
+    audio: Optional[AudioFeatures] = None
+    video: Optional[VideoFeatures] = None
+    cache_id: Optional[str] = None
+    analyzed_at: Optional[str] = None
+
+    def __post_init__(self):
+        """Compute cache_id if not provided."""
+        if self.cache_id is None:
+            self.cache_id = self._compute_cache_id()
+
+    def _compute_cache_id(self) -> str:
+        """
+        Compute cache ID from input hash and requested features.
+
+        cache_id = SHA3-256(input_hash + sorted(features_requested))
+        """
+        content = {
+            "input_hash": self.input_hash,
+            "features": sorted(self.features_requested),
+        }
+        return _stable_hash(content)
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "input_hash": self.input_hash,
+            "features_requested": self.features_requested,
+            "audio": self.audio.to_dict() if self.audio else None,
+            "video": self.video.to_dict() if self.video else None,
+            "cache_id": self.cache_id,
+            "analyzed_at": self.analyzed_at,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "AnalysisResult":
+        return cls(
+            input_hash=data["input_hash"],
+            features_requested=data["features_requested"],
+            audio=AudioFeatures.from_dict(data["audio"]) if data.get("audio") else None,
+            video=VideoFeatures.from_dict(data["video"]) if data.get("video") else None,
+            cache_id=data.get("cache_id"),
+            analyzed_at=data.get("analyzed_at"),
+        )
+
+    def to_json(self) -> str:
+        """Serialize to JSON string."""
+        return json.dumps(self.to_dict(), indent=2)
+
+    @classmethod
+    def from_json(cls, json_str: str) -> "AnalysisResult":
+        """Deserialize from JSON string."""
+        return cls.from_dict(json.loads(json_str))
+
+    # Convenience accessors
+    @property
+    def tempo(self) -> Optional[float]:
+        """Get tempo if beats were analyzed."""
+        return self.audio.beats.tempo if self.audio and self.audio.beats else None
+
+    @property
+    def beat_times(self) -> Optional[List[float]]:
+        """Get beat times if beats were analyzed."""
+        return self.audio.beats.beat_times if self.audio and self.audio.beats else None
+
+    @property
+    def downbeat_times(self) -> Optional[List[float]]:
+        """Get downbeat times if analyzed."""
+        return self.audio.beats.downbeat_times if self.audio and self.audio.beats else None
+
+    @property
+    def duration(self) -> float:
+        """Get duration from video or audio."""
+        if self.video:
+            return self.video.duration
+        if self.audio:
+            return self.audio.duration
+        return 0.0
+
+    @property
+    def dimensions(self) -> Optional[Tuple[int, int]]:
+        """Get video dimensions if available."""
+        if self.video:
+            return (self.video.width, self.video.height)
+        return None
--- a/core/artdag/analysis/video.py
+++ b/core/artdag/analysis/video.py
@@ -0,0 +1,266 @@
+# artdag/analysis/video.py
+"""
+Video feature extraction.
+
+Uses ffprobe for basic metadata and optional OpenCV for motion analysis.
+"""
+
+import json
+import logging
+import subprocess
+from fractions import Fraction
+from pathlib import Path
+from typing import List, Optional
+
+from .schema import VideoFeatures
+
+logger = logging.getLogger(__name__)
+
+# Feature names
+FEATURE_METADATA = "metadata"
+FEATURE_MOTION_TEMPO = "motion_tempo"
+FEATURE_SCENE_CHANGES = "scene_changes"
+FEATURE_ALL = "all"
+
+
+def _parse_frame_rate(rate_str: str) -> float:
+    """Parse frame rate string like '30000/1001' or '30'."""
+    try:
+        if "/" in rate_str:
+            frac = Fraction(rate_str)
+            return float(frac)
+        return float(rate_str)
+    except (ValueError, ZeroDivisionError):
+        return 30.0  # Default
+
+
+def analyze_metadata(path: Path) -> VideoFeatures:
+    """
+    Extract video metadata using ffprobe.
+
+    Args:
+        path: Path to video file
+
+    Returns:
+        VideoFeatures with basic metadata
+    """
+    cmd = [
+        "ffprobe", "-v", "quiet",
+        "-print_format", "json",
+        "-show_streams",
+        "-show_format",
+        "-select_streams", "v:0",
+        str(path)
+    ]
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        data = json.loads(result.stdout)
+    except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
+        raise ValueError(f"Could not read video info: {e}")
+
+    if not data.get("streams"):
+        raise ValueError("No video stream found")
+
+    stream = data["streams"][0]
+    fmt = data.get("format", {})
+
+    # Get duration from format or stream
+    duration = float(fmt.get("duration", stream.get("duration", 0)))
+
+    # Parse frame rate
+    frame_rate = _parse_frame_rate(stream.get("avg_frame_rate", "30"))
+
+    return VideoFeatures(
+        duration=duration,
+        frame_rate=frame_rate,
+        width=int(stream.get("width", 0)),
+        height=int(stream.get("height", 0)),
+        codec=stream.get("codec_name", ""),
+    )
+
+
+def analyze_scene_changes(path: Path, threshold: float = 0.3) -> List[float]:
+    """
+    Detect scene changes using ffmpeg scene detection.
+
+    Args:
+        path: Path to video file
+        threshold: Scene change threshold (0-1, lower = more sensitive)
+
+    Returns:
+        List of scene change times in seconds
+    """
+    cmd = [
+        "ffmpeg", "-i", str(path),
+        "-vf", f"select='gt(scene,{threshold})',showinfo",
+        "-f", "null", "-"
+    ]
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        stderr = result.stderr
+    except subprocess.CalledProcessError as e:
+        logger.warning(f"Scene detection failed: {e}")
+        return []
+
+    # Parse scene change times from ffmpeg output
+    scene_times = []
+    for line in stderr.split("\n"):
+        if "pts_time:" in line:
+            try:
+                # Extract pts_time value
+                for part in line.split():
+                    if part.startswith("pts_time:"):
+                        time_str = part.split(":")[1]
+                        scene_times.append(float(time_str))
+                        break
+            except (ValueError, IndexError):
+                continue
+
+    return scene_times
+
+
+def analyze_motion_tempo(path: Path, sample_duration: float = 30.0) -> Optional[float]:
+    """
+    Estimate tempo from video motion periodicity.
+
+    Analyzes optical flow or frame differences to detect rhythmic motion.
+    This is useful for matching video speed to audio tempo.
+
+    Args:
+        path: Path to video file
+        sample_duration: Duration to analyze (seconds)
+
+    Returns:
+        Estimated motion tempo in BPM, or None if not detectable
+    """
+    try:
+        import cv2
+        import numpy as np
+    except ImportError:
+        logger.warning("OpenCV not available, skipping motion tempo analysis")
+        return None
+
+    cap = cv2.VideoCapture(str(path))
+    if not cap.isOpened():
+        logger.warning(f"Could not open video: {path}")
+        return None
+
+    try:
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        if fps <= 0:
+            fps = 30.0
+
+        max_frames = int(sample_duration * fps)
+        frame_diffs = []
+        prev_gray = None
+
+        frame_count = 0
+        while frame_count < max_frames:
+            ret, frame = cap.read()
+            if not ret:
+                break
+
+            # Convert to grayscale and resize for speed
+            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            gray = cv2.resize(gray, (160, 90))
+
+            if prev_gray is not None:
+                # Calculate frame difference
+                diff = cv2.absdiff(gray, prev_gray)
+                frame_diffs.append(np.mean(diff))
+
+            prev_gray = gray
+            frame_count += 1
+
+        if len(frame_diffs) < 60:  # Need at least 2 seconds at 30fps
+            return None
+
+        # Convert to numpy array
+        motion = np.array(frame_diffs)
+
+        # Normalize
+        motion = motion - motion.mean()
+        if motion.std() > 0:
+            motion = motion / motion.std()
+
+        # Autocorrelation to find periodicity
+        n = len(motion)
+        acf = np.correlate(motion, motion, mode="full")[n-1:]
+        acf = acf / acf[0]  # Normalize
+
+        # Find peaks in autocorrelation (potential beat periods)
+        # Look for periods between 0.3s (200 BPM) and 2s (30 BPM)
+        min_lag = int(0.3 * fps)
+        max_lag = min(int(2.0 * fps), len(acf) - 1)
+
+        if max_lag <= min_lag:
+            return None
+
+        # Find the highest peak in the valid range
+        search_range = acf[min_lag:max_lag]
+        if len(search_range) == 0:
+            return None
+
+        peak_idx = np.argmax(search_range) + min_lag
+        peak_value = acf[peak_idx]
+
+        # Only report if peak is significant
+        if peak_value < 0.1:
+            return None
+
+        # Convert lag to BPM
+        period_seconds = peak_idx / fps
+        bpm = 60.0 / period_seconds
+
+        # Sanity check
+        if 30 <= bpm <= 200:
+            return round(bpm, 1)
+
+        return None
+
+    finally:
+        cap.release()
+
+
+def analyze_video(
+    path: Path,
+    features: Optional[List[str]] = None,
+) -> VideoFeatures:
+    """
+    Extract video features from file.
+
+    Args:
+        path: Path to video file
+        features: List of features to extract. Options:
+            - "metadata": Basic video info (always included)
+            - "motion_tempo": Estimated tempo from motion
+            - "scene_changes": Scene change detection
+            - "all": All features
+
+    Returns:
+        VideoFeatures with requested analysis
+    """
+    if features is None:
+        features = [FEATURE_METADATA]
+
+    if FEATURE_ALL in features:
+        features = [FEATURE_METADATA, FEATURE_MOTION_TEMPO, FEATURE_SCENE_CHANGES]
+
+    # Basic metadata is always extracted
+    result = analyze_metadata(path)
+
+    if FEATURE_MOTION_TEMPO in features:
+        try:
+            result.motion_tempo = analyze_motion_tempo(path)
+        except Exception as e:
+            logger.warning(f"Motion tempo analysis failed: {e}")
+
+    if FEATURE_SCENE_CHANGES in features:
+        try:
+            result.scene_changes = analyze_scene_changes(path)
+        except Exception as e:
+            logger.warning(f"Scene change detection failed: {e}")
+
+    return result
--- a/core/artdag/cache.py
+++ b/core/artdag/cache.py
@@ -0,0 +1,464 @@
+# primitive/cache.py
+"""
+Content-addressed file cache for node outputs.
+
+Each node's output is stored at: cache_dir / node_id / output_file
+This enables automatic reuse when the same operation is requested.
+"""
+
+import json
+import logging
+import shutil
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+def _file_hash(path: Path, algorithm: str = "sha3_256") -> str:
+    """
+    Compute content hash of a file.
+
+    Uses SHA-3 (Keccak) by default for quantum resistance.
+    """
+    import hashlib
+    hasher = hashlib.new(algorithm)
+    with open(path, "rb") as f:
+        for chunk in iter(lambda: f.read(65536), b""):
+            hasher.update(chunk)
+    return hasher.hexdigest()
+
+
+@dataclass
+class CacheEntry:
+    """Metadata about a cached output."""
+    node_id: str
+    output_path: Path
+    created_at: float
+    size_bytes: int
+    node_type: str
+    cid: str = ""  # Content identifier (IPFS CID or local hash)
+    execution_time: float = 0.0
+
+    def to_dict(self) -> Dict:
+        return {
+            "node_id": self.node_id,
+            "output_path": str(self.output_path),
+            "created_at": self.created_at,
+            "size_bytes": self.size_bytes,
+            "node_type": self.node_type,
+            "cid": self.cid,
+            "execution_time": self.execution_time,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict) -> "CacheEntry":
+        # Support both "cid" and legacy "content_hash"
+        cid = data.get("cid") or data.get("content_hash", "")
+        return cls(
+            node_id=data["node_id"],
+            output_path=Path(data["output_path"]),
+            created_at=data["created_at"],
+            size_bytes=data["size_bytes"],
+            node_type=data["node_type"],
+            cid=cid,
+            execution_time=data.get("execution_time", 0.0),
+        )
+
+
+@dataclass
+class CacheStats:
+    """Statistics about cache usage."""
+    total_entries: int = 0
+    total_size_bytes: int = 0
+    hits: int = 0
+    misses: int = 0
+    hit_rate: float = 0.0
+
+    def record_hit(self):
+        self.hits += 1
+        self._update_rate()
+
+    def record_miss(self):
+        self.misses += 1
+        self._update_rate()
+
+    def _update_rate(self):
+        total = self.hits + self.misses
+        self.hit_rate = self.hits / total if total > 0 else 0.0
+
+
+class Cache:
+    """
+    Code-addressed file cache.
+
+    The filesystem IS the index - no JSON index files needed.
+    Each node's hash is its directory name.
+
+    Structure:
+        cache_dir/
+            <hash>/
+                output.ext       # Actual output file
+                metadata.json    # Per-node metadata (optional)
+    """
+
+    def __init__(self, cache_dir: Path | str):
+        self.cache_dir = Path(cache_dir)
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.stats = CacheStats()
+
+    def _node_dir(self, node_id: str) -> Path:
+        """Get the cache directory for a node."""
+        return self.cache_dir / node_id
+
+    def _find_output_file(self, node_dir: Path) -> Optional[Path]:
+        """Find the output file in a node directory."""
+        if not node_dir.exists() or not node_dir.is_dir():
+            return None
+        for f in node_dir.iterdir():
+            if f.is_file() and f.name.startswith("output."):
+                return f
+        return None
+
+    def get(self, node_id: str) -> Optional[Path]:
+        """
+        Get cached output path for a node.
+
+        Checks filesystem directly - no in-memory index.
+        Returns the output path if cached, None otherwise.
+        """
+        node_dir = self._node_dir(node_id)
+        output_file = self._find_output_file(node_dir)
+
+        if output_file:
+            self.stats.record_hit()
+            logger.debug(f"Cache hit: {node_id[:16]}...")
+            return output_file
+
+        self.stats.record_miss()
+        return None
+
+    def put(self, node_id: str, source_path: Path, node_type: str,
+            execution_time: float = 0.0, move: bool = False) -> Path:
+        """
+        Store a file in the cache.
+
+        Args:
+            node_id: The code-addressed node ID (hash)
+            source_path: Path to the file to cache
+            node_type: Type of the node (for metadata)
+            execution_time: How long the node took to execute
+            move: If True, move the file instead of copying
+
+        Returns:
+            Path to the cached file
+        """
+        node_dir = self._node_dir(node_id)
+        node_dir.mkdir(parents=True, exist_ok=True)
+
+        # Preserve extension
+        ext = source_path.suffix or ".out"
+        output_path = node_dir / f"output{ext}"
+
+        # Copy or move file (skip if already in place)
+        source_resolved = Path(source_path).resolve()
+        output_resolved = output_path.resolve()
+        if source_resolved != output_resolved:
+            if move:
+                shutil.move(source_path, output_path)
+            else:
+                shutil.copy2(source_path, output_path)
+
+        # Compute content hash (IPFS CID of the result)
+        cid = _file_hash(output_path)
+
+        # Store per-node metadata (optional, for stats/debugging)
+        metadata = {
+            "node_id": node_id,
+            "output_path": str(output_path),
+            "created_at": time.time(),
+            "size_bytes": output_path.stat().st_size,
+            "node_type": node_type,
+            "cid": cid,
+            "execution_time": execution_time,
+        }
+        metadata_path = node_dir / "metadata.json"
+        with open(metadata_path, "w") as f:
+            json.dump(metadata, f, indent=2)
+
+        logger.debug(f"Cached: {node_id[:16]}... ({metadata['size_bytes']} bytes)")
+        return output_path
+
+    def has(self, node_id: str) -> bool:
+        """Check if a node is cached (without affecting stats)."""
+        return self._find_output_file(self._node_dir(node_id)) is not None
+
+    def remove(self, node_id: str) -> bool:
+        """Remove a node from the cache."""
+        node_dir = self._node_dir(node_id)
+        if node_dir.exists():
+            shutil.rmtree(node_dir)
+            return True
+        return False
+
+    def clear(self):
+        """Clear all cached entries."""
+        for node_dir in self.cache_dir.iterdir():
+            if node_dir.is_dir() and not node_dir.name.startswith("_"):
+                shutil.rmtree(node_dir)
+        self.stats = CacheStats()
+
+    def get_stats(self) -> CacheStats:
+        """Get cache statistics (scans filesystem)."""
+        stats = CacheStats()
+        for node_dir in self.cache_dir.iterdir():
+            if node_dir.is_dir() and not node_dir.name.startswith("_"):
+                output_file = self._find_output_file(node_dir)
+                if output_file:
+                    stats.total_entries += 1
+                    stats.total_size_bytes += output_file.stat().st_size
+        stats.hits = self.stats.hits
+        stats.misses = self.stats.misses
+        stats.hit_rate = self.stats.hit_rate
+        return stats
+
+    def list_entries(self) -> List[CacheEntry]:
+        """List all cache entries (scans filesystem)."""
+        entries = []
+        for node_dir in self.cache_dir.iterdir():
+            if node_dir.is_dir() and not node_dir.name.startswith("_"):
+                entry = self._load_entry_from_disk(node_dir.name)
+                if entry:
+                    entries.append(entry)
+        return entries
+
+    def _load_entry_from_disk(self, node_id: str) -> Optional[CacheEntry]:
+        """Load entry metadata from disk."""
+        node_dir = self._node_dir(node_id)
+        metadata_path = node_dir / "metadata.json"
+        output_file = self._find_output_file(node_dir)
+
+        if not output_file:
+            return None
+
+        if metadata_path.exists():
+            try:
+                with open(metadata_path) as f:
+                    data = json.load(f)
+                return CacheEntry.from_dict(data)
+            except (json.JSONDecodeError, KeyError):
+                pass
+
+        # Fallback: create entry from filesystem
+        return CacheEntry(
+            node_id=node_id,
+            output_path=output_file,
+            created_at=output_file.stat().st_mtime,
+            size_bytes=output_file.stat().st_size,
+            node_type="unknown",
+            cid=_file_hash(output_file),
+        )
+
+    def get_entry(self, node_id: str) -> Optional[CacheEntry]:
+        """Get cache entry metadata (without affecting stats)."""
+        return self._load_entry_from_disk(node_id)
+
+    def find_by_cid(self, cid: str) -> Optional[CacheEntry]:
+        """Find a cache entry by its content hash (scans filesystem)."""
+        for entry in self.list_entries():
+            if entry.cid == cid:
+                return entry
+        return None
+
+    def prune(self, max_size_bytes: int = None, max_age_seconds: float = None) -> int:
+        """
+        Prune cache based on size or age.
+
+        Args:
+            max_size_bytes: Remove oldest entries until under this size
+            max_age_seconds: Remove entries older than this
+
+        Returns:
+            Number of entries removed
+        """
+        removed = 0
+        now = time.time()
+        entries = self.list_entries()
+
+        # Remove by age first
+        if max_age_seconds is not None:
+            for entry in entries:
+                if now - entry.created_at > max_age_seconds:
+                    self.remove(entry.node_id)
+                    removed += 1
+
+        # Then by size (remove oldest first)
+        if max_size_bytes is not None:
+            stats = self.get_stats()
+            if stats.total_size_bytes > max_size_bytes:
+                sorted_entries = sorted(entries, key=lambda e: e.created_at)
+                total_size = stats.total_size_bytes
+                for entry in sorted_entries:
+                    if total_size <= max_size_bytes:
+                        break
+                    self.remove(entry.node_id)
+                    total_size -= entry.size_bytes
+                    removed += 1
+
+        return removed
+
+    def get_output_path(self, node_id: str, extension: str = ".mkv") -> Path:
+        """Get the output path for a node (creates directory if needed)."""
+        node_dir = self._node_dir(node_id)
+        node_dir.mkdir(parents=True, exist_ok=True)
+        return node_dir / f"output{extension}"
+
+    # Effect storage methods
+
+    def _effects_dir(self) -> Path:
+        """Get the effects subdirectory."""
+        effects_dir = self.cache_dir / "_effects"
+        effects_dir.mkdir(parents=True, exist_ok=True)
+        return effects_dir
+
+    def store_effect(self, source: str) -> str:
+        """
+        Store an effect in the cache.
+
+        Args:
+            source: Effect source code
+
+        Returns:
+            Content hash (cache ID) of the effect
+        """
+        import hashlib as _hashlib
+
+        # Compute content hash
+        cid = _hashlib.sha3_256(source.encode("utf-8")).hexdigest()
+
+        # Try to load full metadata if effects module available
+        try:
+            from .effects.loader import load_effect
+            loaded = load_effect(source)
+            meta_dict = loaded.meta.to_dict()
+            dependencies = loaded.dependencies
+            requires_python = loaded.requires_python
+        except ImportError:
+            # Fallback: store without parsed metadata
+            meta_dict = {}
+            dependencies = []
+            requires_python = ">=3.10"
+
+        effect_dir = self._effects_dir() / cid
+        effect_dir.mkdir(parents=True, exist_ok=True)
+
+        # Store source
+        source_path = effect_dir / "effect.py"
+        source_path.write_text(source, encoding="utf-8")
+
+        # Store metadata
+        metadata = {
+            "cid": cid,
+            "meta": meta_dict,
+            "dependencies": dependencies,
+            "requires_python": requires_python,
+            "stored_at": time.time(),
+        }
+        metadata_path = effect_dir / "metadata.json"
+        with open(metadata_path, "w") as f:
+            json.dump(metadata, f, indent=2)
+
+        logger.info(f"Stored effect '{loaded.meta.name}' with hash {cid[:16]}...")
+        return cid
+
+    def get_effect(self, cid: str) -> Optional[str]:
+        """
+        Get effect source by content hash.
+
+        Args:
+            cid: SHA3-256 hash of effect source
+
+        Returns:
+            Effect source code if found, None otherwise
+        """
+        effect_dir = self._effects_dir() / cid
+        source_path = effect_dir / "effect.py"
+
+        if not source_path.exists():
+            return None
+
+        return source_path.read_text(encoding="utf-8")
+
+    def get_effect_path(self, cid: str) -> Optional[Path]:
+        """
+        Get path to effect source file.
+
+        Args:
+            cid: SHA3-256 hash of effect source
+
+        Returns:
+            Path to effect.py if found, None otherwise
+        """
+        effect_dir = self._effects_dir() / cid
+        source_path = effect_dir / "effect.py"
+
+        if not source_path.exists():
+            return None
+
+        return source_path
+
+    def get_effect_metadata(self, cid: str) -> Optional[dict]:
+        """
+        Get effect metadata by content hash.
+
+        Args:
+            cid: SHA3-256 hash of effect source
+
+        Returns:
+            Metadata dict if found, None otherwise
+        """
+        effect_dir = self._effects_dir() / cid
+        metadata_path = effect_dir / "metadata.json"
+
+        if not metadata_path.exists():
+            return None
+
+        try:
+            with open(metadata_path) as f:
+                return json.load(f)
+        except (json.JSONDecodeError, KeyError):
+            return None
+
+    def has_effect(self, cid: str) -> bool:
+        """Check if an effect is cached."""
+        effect_dir = self._effects_dir() / cid
+        return (effect_dir / "effect.py").exists()
+
+    def list_effects(self) -> List[dict]:
+        """List all cached effects with their metadata."""
+        effects = []
+        effects_dir = self._effects_dir()
+
+        if not effects_dir.exists():
+            return effects
+
+        for effect_dir in effects_dir.iterdir():
+            if effect_dir.is_dir():
+                metadata = self.get_effect_metadata(effect_dir.name)
+                if metadata:
+                    effects.append(metadata)
+
+        return effects
+
+    def remove_effect(self, cid: str) -> bool:
+        """Remove an effect from the cache."""
+        effect_dir = self._effects_dir() / cid
+
+        if not effect_dir.exists():
+            return False
+
+        shutil.rmtree(effect_dir)
+        logger.info(f"Removed effect {cid[:16]}...")
+        return True
--- a/core/artdag/cli.py
+++ b/core/artdag/cli.py
@@ -0,0 +1,724 @@
+#!/usr/bin/env python3
+"""
+Art DAG CLI
+
+Command-line interface for the 3-phase execution model:
+  artdag analyze - Extract features from inputs
+  artdag plan - Generate execution plan
+  artdag execute - Run the plan
+  artdag run-recipe - Full pipeline
+
+Usage:
+  artdag analyze <recipe> -i <name>:<hash>[@<path>] [--features <list>]
+  artdag plan <recipe> -i <name>:<hash> [--analysis <file>]
+  artdag execute <plan.json> [--dry-run]
+  artdag run-recipe <recipe> -i <name>:<hash>[@<path>]
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+
+def parse_input(input_str: str) -> Tuple[str, str, Optional[str]]:
+    """
+    Parse input specification: name:hash[@path]
+
+    Returns (name, hash, path or None)
+    """
+    if "@" in input_str:
+        name_hash, path = input_str.rsplit("@", 1)
+    else:
+        name_hash = input_str
+        path = None
+
+    if ":" not in name_hash:
+        raise ValueError(f"Invalid input format: {input_str}. Expected name:hash[@path]")
+
+    name, hash_value = name_hash.split(":", 1)
+    return name, hash_value, path
+
+
+def parse_inputs(input_list: List[str]) -> Tuple[Dict[str, str], Dict[str, str]]:
+    """
+    Parse list of input specifications.
+
+    Returns (input_hashes, input_paths)
+    """
+    input_hashes = {}
+    input_paths = {}
+
+    for input_str in input_list:
+        name, hash_value, path = parse_input(input_str)
+        input_hashes[name] = hash_value
+        if path:
+            input_paths[name] = path
+
+    return input_hashes, input_paths
+
+
+def cmd_analyze(args):
+    """Run analysis phase."""
+    from .analysis import Analyzer
+
+    # Parse inputs
+    input_hashes, input_paths = parse_inputs(args.input)
+
+    # Parse features
+    features = args.features.split(",") if args.features else ["all"]
+
+    # Create analyzer
+    cache_dir = Path(args.cache_dir) if args.cache_dir else Path("./analysis_cache")
+    analyzer = Analyzer(cache_dir=cache_dir)
+
+    # Analyze each input
+    results = {}
+    for name, hash_value in input_hashes.items():
+        path = input_paths.get(name)
+        if path:
+            path = Path(path)
+
+        print(f"Analyzing {name} ({hash_value[:16]}...)...")
+
+        result = analyzer.analyze(
+            input_hash=hash_value,
+            features=features,
+            input_path=path,
+        )
+
+        results[hash_value] = result.to_dict()
+
+        # Print summary
+        if result.audio and result.audio.beats:
+            print(f"  Tempo: {result.audio.beats.tempo:.1f} BPM")
+            print(f"  Beats: {len(result.audio.beats.beat_times)}")
+        if result.video:
+            print(f"  Duration: {result.video.duration:.1f}s")
+            print(f"  Dimensions: {result.video.width}x{result.video.height}")
+
+    # Write output
+    output_path = Path(args.output) if args.output else Path("analysis.json")
+    with open(output_path, "w") as f:
+        json.dump(results, f, indent=2)
+
+    print(f"\nAnalysis saved to: {output_path}")
+
+
+def cmd_plan(args):
+    """Run planning phase."""
+    from .analysis import AnalysisResult
+    from .planning import RecipePlanner, Recipe
+
+    # Load recipe
+    recipe = Recipe.from_file(Path(args.recipe))
+    print(f"Recipe: {recipe.name} v{recipe.version}")
+
+    # Parse inputs
+    input_hashes, _ = parse_inputs(args.input)
+
+    # Load analysis if provided
+    analysis = {}
+    if args.analysis:
+        with open(args.analysis, "r") as f:
+            analysis_data = json.load(f)
+        for hash_value, data in analysis_data.items():
+            analysis[hash_value] = AnalysisResult.from_dict(data)
+
+    # Create planner
+    planner = RecipePlanner(use_tree_reduction=not args.no_tree_reduction)
+
+    # Generate plan
+    print("Generating execution plan...")
+    plan = planner.plan(
+        recipe=recipe,
+        input_hashes=input_hashes,
+        analysis=analysis,
+    )
+
+    # Print summary
+    print(f"\nPlan ID: {plan.plan_id[:16]}...")
+    print(f"Steps: {len(plan.steps)}")
+
+    steps_by_level = plan.get_steps_by_level()
+    max_level = max(steps_by_level.keys()) if steps_by_level else 0
+    print(f"Levels: {max_level + 1}")
+
+    for level in sorted(steps_by_level.keys()):
+        steps = steps_by_level[level]
+        print(f"  Level {level}: {len(steps)} steps (parallel)")
+
+    # Write output
+    output_path = Path(args.output) if args.output else Path("plan.json")
+    with open(output_path, "w") as f:
+        f.write(plan.to_json())
+
+    print(f"\nPlan saved to: {output_path}")
+
+
+def cmd_execute(args):
+    """Run execution phase."""
+    from .planning import ExecutionPlan
+    from .cache import Cache
+    from .executor import get_executor
+    from .dag import NodeType
+    from . import nodes  # Register built-in executors
+
+    # Load plan
+    with open(args.plan, "r") as f:
+        plan = ExecutionPlan.from_json(f.read())
+
+    print(f"Executing plan: {plan.plan_id[:16]}...")
+    print(f"Steps: {len(plan.steps)}")
+
+    if args.dry_run:
+        print("\n=== DRY RUN ===")
+
+        # Check cache status
+        cache = Cache(Path(args.cache_dir) if args.cache_dir else Path("./cache"))
+        steps_by_level = plan.get_steps_by_level()
+
+        cached_count = 0
+        pending_count = 0
+
+        for level in sorted(steps_by_level.keys()):
+            steps = steps_by_level[level]
+            print(f"\nLevel {level}:")
+            for step in steps:
+                if cache.has(step.cache_id):
+                    print(f"  [CACHED] {step.step_id}: {step.node_type}")
+                    cached_count += 1
+                else:
+                    print(f"  [PENDING] {step.step_id}: {step.node_type}")
+                    pending_count += 1
+
+        print(f"\nSummary: {cached_count} cached, {pending_count} pending")
+        return
+
+    # Execute locally (for testing - production uses Celery)
+    cache = Cache(Path(args.cache_dir) if args.cache_dir else Path("./cache"))
+
+    cache_paths = {}
+    for name, hash_value in plan.input_hashes.items():
+        if cache.has(hash_value):
+            entry = cache.get(hash_value)
+            cache_paths[hash_value] = str(entry.output_path)
+
+    steps_by_level = plan.get_steps_by_level()
+    executed = 0
+    cached = 0
+
+    for level in sorted(steps_by_level.keys()):
+        steps = steps_by_level[level]
+        print(f"\nLevel {level}: {len(steps)} steps")
+
+        for step in steps:
+            if cache.has(step.cache_id):
+                cached_path = cache.get(step.cache_id)
+                cache_paths[step.cache_id] = str(cached_path)
+                cache_paths[step.step_id] = str(cached_path)
+                print(f"  [CACHED] {step.step_id}")
+                cached += 1
+                continue
+
+            print(f"  [RUNNING] {step.step_id}: {step.node_type}...")
+
+            # Get executor
+            try:
+                node_type = NodeType[step.node_type]
+            except KeyError:
+                node_type = step.node_type
+
+            executor = get_executor(node_type)
+            if executor is None:
+                print(f"    ERROR: No executor for {step.node_type}")
+                continue
+
+            # Resolve inputs
+            input_paths = []
+            for input_id in step.input_steps:
+                if input_id in cache_paths:
+                    input_paths.append(Path(cache_paths[input_id]))
+                else:
+                    input_step = plan.get_step(input_id)
+                    if input_step and input_step.cache_id in cache_paths:
+                        input_paths.append(Path(cache_paths[input_step.cache_id]))
+
+            if len(input_paths) != len(step.input_steps):
+                print(f"    ERROR: Missing inputs")
+                continue
+
+            # Execute
+            output_path = cache.get_output_path(step.cache_id)
+            try:
+                result_path = executor.execute(step.config, input_paths, output_path)
+                cache.put(step.cache_id, result_path, node_type=step.node_type)
+                cache_paths[step.cache_id] = str(result_path)
+                cache_paths[step.step_id] = str(result_path)
+                print(f"    [DONE] -> {result_path}")
+                executed += 1
+            except Exception as e:
+                print(f"    [FAILED] {e}")
+
+    # Final output
+    output_step = plan.get_step(plan.output_step)
+    output_path = cache_paths.get(output_step.cache_id) if output_step else None
+
+    print(f"\n=== Complete ===")
+    print(f"Cached: {cached}")
+    print(f"Executed: {executed}")
+    if output_path:
+        print(f"Output: {output_path}")
+
+
+def cmd_run_recipe(args):
+    """Run complete pipeline: analyze → plan → execute."""
+    from .analysis import Analyzer, AnalysisResult
+    from .planning import RecipePlanner, Recipe
+    from .cache import Cache
+    from .executor import get_executor
+    from .dag import NodeType
+    from . import nodes  # Register built-in executors
+
+    # Load recipe
+    recipe = Recipe.from_file(Path(args.recipe))
+    print(f"Recipe: {recipe.name} v{recipe.version}")
+
+    # Parse inputs
+    input_hashes, input_paths = parse_inputs(args.input)
+
+    # Parse features
+    features = args.features.split(",") if args.features else ["beats", "energy"]
+
+    cache_dir = Path(args.cache_dir) if args.cache_dir else Path("./cache")
+
+    # Phase 1: Analyze
+    print("\n=== Phase 1: Analysis ===")
+    analyzer = Analyzer(cache_dir=cache_dir / "analysis")
+
+    analysis = {}
+    for name, hash_value in input_hashes.items():
+        path = input_paths.get(name)
+        if path:
+            path = Path(path)
+            print(f"Analyzing {name}...")
+
+            result = analyzer.analyze(
+                input_hash=hash_value,
+                features=features,
+                input_path=path,
+            )
+            analysis[hash_value] = result
+
+            if result.audio and result.audio.beats:
+                print(f"  Tempo: {result.audio.beats.tempo:.1f} BPM, {len(result.audio.beats.beat_times)} beats")
+
+    # Phase 2: Plan
+    print("\n=== Phase 2: Planning ===")
+
+    # Check for cached plan
+    plans_dir = cache_dir / "plans"
+    plans_dir.mkdir(parents=True, exist_ok=True)
+
+    # Generate plan to get plan_id (deterministic hash)
+    planner = RecipePlanner(use_tree_reduction=True)
+    plan = planner.plan(
+        recipe=recipe,
+        input_hashes=input_hashes,
+        analysis=analysis,
+    )
+
+    plan_cache_path = plans_dir / f"{plan.plan_id}.json"
+
+    if plan_cache_path.exists():
+        print(f"Plan cached: {plan.plan_id[:16]}...")
+        from .planning import ExecutionPlan
+        with open(plan_cache_path, "r") as f:
+            plan = ExecutionPlan.from_json(f.read())
+    else:
+        # Save plan to cache
+        with open(plan_cache_path, "w") as f:
+            f.write(plan.to_json())
+        print(f"Plan saved: {plan.plan_id[:16]}...")
+
+    print(f"Plan: {len(plan.steps)} steps")
+    steps_by_level = plan.get_steps_by_level()
+    print(f"Levels: {len(steps_by_level)}")
+
+    # Phase 3: Execute
+    print("\n=== Phase 3: Execution ===")
+
+    cache = Cache(cache_dir)
+
+    # Build initial cache paths
+    cache_paths = {}
+    for name, hash_value in input_hashes.items():
+        path = input_paths.get(name)
+        if path:
+            cache_paths[hash_value] = path
+            cache_paths[name] = path
+
+    executed = 0
+    cached = 0
+
+    for level in sorted(steps_by_level.keys()):
+        steps = steps_by_level[level]
+        print(f"\nLevel {level}: {len(steps)} steps")
+
+        for step in steps:
+            if cache.has(step.cache_id):
+                cached_path = cache.get(step.cache_id)
+                cache_paths[step.cache_id] = str(cached_path)
+                cache_paths[step.step_id] = str(cached_path)
+                print(f"  [CACHED] {step.step_id}")
+                cached += 1
+                continue
+
+            # Handle SOURCE specially
+            if step.node_type == "SOURCE":
+                cid = step.config.get("cid")
+                if cid in cache_paths:
+                    cache_paths[step.cache_id] = cache_paths[cid]
+                    cache_paths[step.step_id] = cache_paths[cid]
+                    print(f"  [SOURCE] {step.step_id}")
+                    continue
+
+            print(f"  [RUNNING] {step.step_id}: {step.node_type}...")
+
+            try:
+                node_type = NodeType[step.node_type]
+            except KeyError:
+                node_type = step.node_type
+
+            executor = get_executor(node_type)
+            if executor is None:
+                print(f"    SKIP: No executor for {step.node_type}")
+                continue
+
+            # Resolve inputs
+            input_paths_list = []
+            for input_id in step.input_steps:
+                if input_id in cache_paths:
+                    input_paths_list.append(Path(cache_paths[input_id]))
+                else:
+                    input_step = plan.get_step(input_id)
+                    if input_step and input_step.cache_id in cache_paths:
+                        input_paths_list.append(Path(cache_paths[input_step.cache_id]))
+
+            if len(input_paths_list) != len(step.input_steps):
+                print(f"    ERROR: Missing inputs for {step.step_id}")
+                continue
+
+            output_path = cache.get_output_path(step.cache_id)
+            try:
+                result_path = executor.execute(step.config, input_paths_list, output_path)
+                cache.put(step.cache_id, result_path, node_type=step.node_type)
+                cache_paths[step.cache_id] = str(result_path)
+                cache_paths[step.step_id] = str(result_path)
+                print(f"    [DONE]")
+                executed += 1
+            except Exception as e:
+                print(f"    [FAILED] {e}")
+
+    # Final output
+    output_step = plan.get_step(plan.output_step)
+    output_path = cache_paths.get(output_step.cache_id) if output_step else None
+
+    print(f"\n=== Complete ===")
+    print(f"Cached: {cached}")
+    print(f"Executed: {executed}")
+    if output_path:
+        print(f"Output: {output_path}")
+
+
+def cmd_run_recipe_ipfs(args):
+    """Run complete pipeline with IPFS-primary mode.
+
+    Everything stored on IPFS:
+    - Inputs (media files)
+    - Analysis results (JSON)
+    - Execution plans (JSON)
+    - Step outputs (media files)
+    """
+    import hashlib
+    import shutil
+    import tempfile
+
+    from .analysis import Analyzer, AnalysisResult
+    from .planning import RecipePlanner, Recipe, ExecutionPlan
+    from .executor import get_executor
+    from .dag import NodeType
+    from . import nodes  # Register built-in executors
+
+    # Check for ipfs_client
+    try:
+        from art_celery import ipfs_client
+    except ImportError:
+        # Try relative import for when running from art-celery
+        try:
+            import ipfs_client
+        except ImportError:
+            print("Error: ipfs_client not available. Install art-celery or run from art-celery directory.")
+            sys.exit(1)
+
+    # Check IPFS availability
+    if not ipfs_client.is_available():
+        print("Error: IPFS daemon not available. Start IPFS with 'ipfs daemon'")
+        sys.exit(1)
+
+    print("=== IPFS-Primary Mode ===")
+    print(f"IPFS Node: {ipfs_client.get_node_id()[:16]}...")
+
+    # Load recipe
+    recipe_path = Path(args.recipe)
+    recipe = Recipe.from_file(recipe_path)
+    print(f"\nRecipe: {recipe.name} v{recipe.version}")
+
+    # Parse inputs
+    input_hashes, input_paths = parse_inputs(args.input)
+
+    # Parse features
+    features = args.features.split(",") if args.features else ["beats", "energy"]
+
+    # Phase 0: Register on IPFS
+    print("\n=== Phase 0: Register on IPFS ===")
+
+    # Register recipe
+    recipe_bytes = recipe_path.read_bytes()
+    recipe_cid = ipfs_client.add_bytes(recipe_bytes)
+    print(f"Recipe CID: {recipe_cid}")
+
+    # Register inputs
+    input_cids = {}
+    for name, hash_value in input_hashes.items():
+        path = input_paths.get(name)
+        if path:
+            cid = ipfs_client.add_file(Path(path))
+            if cid:
+                input_cids[name] = cid
+                print(f"Input '{name}': {cid}")
+            else:
+                print(f"Error: Failed to add input '{name}' to IPFS")
+                sys.exit(1)
+
+    # Phase 1: Analyze
+    print("\n=== Phase 1: Analysis ===")
+
+    # Create temp dir for analysis
+    work_dir = Path(tempfile.mkdtemp(prefix="artdag_ipfs_"))
+    analysis_cids = {}
+    analysis = {}
+
+    try:
+        for name, hash_value in input_hashes.items():
+            input_cid = input_cids.get(name)
+            if not input_cid:
+                continue
+
+            print(f"Analyzing {name}...")
+
+            # Fetch from IPFS to temp
+            temp_input = work_dir / f"input_{name}.mkv"
+            if not ipfs_client.get_file(input_cid, temp_input):
+                print(f"  Error: Failed to fetch from IPFS")
+                continue
+
+            # Run analysis
+            analyzer = Analyzer(cache_dir=None)
+            result = analyzer.analyze(
+                input_hash=hash_value,
+                features=features,
+                input_path=temp_input,
+            )
+
+            if result.audio and result.audio.beats:
+                print(f"  Tempo: {result.audio.beats.tempo:.1f} BPM, {len(result.audio.beats.beat_times)} beats")
+
+            # Store analysis on IPFS
+            analysis_cid = ipfs_client.add_json(result.to_dict())
+            if analysis_cid:
+                analysis_cids[hash_value] = analysis_cid
+                analysis[hash_value] = result
+                print(f"  Analysis CID: {analysis_cid}")
+
+        # Phase 2: Plan
+        print("\n=== Phase 2: Planning ===")
+
+        planner = RecipePlanner(use_tree_reduction=True)
+        plan = planner.plan(
+            recipe=recipe,
+            input_hashes=input_hashes,
+            analysis=analysis if analysis else None,
+        )
+
+        # Store plan on IPFS
+        import json
+        plan_dict = json.loads(plan.to_json())
+        plan_cid = ipfs_client.add_json(plan_dict)
+        print(f"Plan ID: {plan.plan_id[:16]}...")
+        print(f"Plan CID: {plan_cid}")
+        print(f"Steps: {len(plan.steps)}")
+
+        steps_by_level = plan.get_steps_by_level()
+        print(f"Levels: {len(steps_by_level)}")
+
+        # Phase 3: Execute
+        print("\n=== Phase 3: Execution ===")
+
+        # CID results
+        cid_results = dict(input_cids)
+        step_cids = {}
+
+        executed = 0
+        cached = 0
+
+        for level in sorted(steps_by_level.keys()):
+            steps = steps_by_level[level]
+            print(f"\nLevel {level}: {len(steps)} steps")
+
+            for step in steps:
+                # Handle SOURCE
+                if step.node_type == "SOURCE":
+                    source_name = step.config.get("name") or step.step_id
+                    cid = cid_results.get(source_name)
+                    if cid:
+                        step_cids[step.step_id] = cid
+                        print(f"  [SOURCE] {step.step_id}")
+                    continue
+
+                print(f"  [RUNNING] {step.step_id}: {step.node_type}...")
+
+                try:
+                    node_type = NodeType[step.node_type]
+                except KeyError:
+                    node_type = step.node_type
+
+                executor = get_executor(node_type)
+                if executor is None:
+                    print(f"    SKIP: No executor for {step.node_type}")
+                    continue
+
+                # Fetch inputs from IPFS
+                input_paths_list = []
+                for i, input_step_id in enumerate(step.input_steps):
+                    input_cid = step_cids.get(input_step_id) or cid_results.get(input_step_id)
+                    if not input_cid:
+                        print(f"    ERROR: Missing input CID for {input_step_id}")
+                        continue
+
+                    temp_path = work_dir / f"step_{step.step_id}_input_{i}.mkv"
+                    if not ipfs_client.get_file(input_cid, temp_path):
+                        print(f"    ERROR: Failed to fetch {input_cid}")
+                        continue
+                    input_paths_list.append(temp_path)
+
+                if len(input_paths_list) != len(step.input_steps):
+                    print(f"    ERROR: Missing inputs")
+                    continue
+
+                # Execute
+                output_path = work_dir / f"step_{step.step_id}_output.mkv"
+                try:
+                    result_path = executor.execute(step.config, input_paths_list, output_path)
+
+                    # Add to IPFS
+                    output_cid = ipfs_client.add_file(result_path)
+                    if output_cid:
+                        step_cids[step.step_id] = output_cid
+                        print(f"    [DONE] CID: {output_cid}")
+                        executed += 1
+                    else:
+                        print(f"    [FAILED] Could not add to IPFS")
+                except Exception as e:
+                    print(f"    [FAILED] {e}")
+
+        # Final output
+        output_step = plan.get_step(plan.output_step)
+        output_cid = step_cids.get(output_step.step_id) if output_step else None
+
+        print(f"\n=== Complete ===")
+        print(f"Executed: {executed}")
+        if output_cid:
+            print(f"Output CID: {output_cid}")
+            print(f"Fetch with: ipfs get {output_cid}")
+
+        # Summary of all CIDs
+        print(f"\n=== All CIDs ===")
+        print(f"Recipe: {recipe_cid}")
+        print(f"Plan: {plan_cid}")
+        for name, cid in input_cids.items():
+            print(f"Input '{name}': {cid}")
+        for hash_val, cid in analysis_cids.items():
+            print(f"Analysis '{hash_val[:16]}...': {cid}")
+        if output_cid:
+            print(f"Output: {output_cid}")
+
+    finally:
+        # Cleanup temp
+        shutil.rmtree(work_dir, ignore_errors=True)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        prog="artdag",
+        description="Art DAG - Declarative media composition",
+    )
+    subparsers = parser.add_subparsers(dest="command", help="Commands")
+
+    # analyze command
+    analyze_parser = subparsers.add_parser("analyze", help="Extract features from inputs")
+    analyze_parser.add_argument("recipe", help="Recipe YAML file")
+    analyze_parser.add_argument("-i", "--input", action="append", required=True,
+                                help="Input: name:hash[@path]")
+    analyze_parser.add_argument("--features", help="Features to extract (comma-separated)")
+    analyze_parser.add_argument("-o", "--output", help="Output file (default: analysis.json)")
+    analyze_parser.add_argument("--cache-dir", help="Analysis cache directory")
+
+    # plan command
+    plan_parser = subparsers.add_parser("plan", help="Generate execution plan")
+    plan_parser.add_argument("recipe", help="Recipe YAML file")
+    plan_parser.add_argument("-i", "--input", action="append", required=True,
+                             help="Input: name:hash")
+    plan_parser.add_argument("--analysis", help="Analysis JSON file")
+    plan_parser.add_argument("-o", "--output", help="Output file (default: plan.json)")
+    plan_parser.add_argument("--no-tree-reduction", action="store_true",
+                             help="Disable tree reduction optimization")
+
+    # execute command
+    execute_parser = subparsers.add_parser("execute", help="Execute a plan")
+    execute_parser.add_argument("plan", help="Plan JSON file")
+    execute_parser.add_argument("--dry-run", action="store_true",
+                                help="Show what would execute")
+    execute_parser.add_argument("--cache-dir", help="Cache directory")
+
+    # run-recipe command
+    run_parser = subparsers.add_parser("run-recipe", help="Full pipeline: analyze → plan → execute")
+    run_parser.add_argument("recipe", help="Recipe YAML file")
+    run_parser.add_argument("-i", "--input", action="append", required=True,
+                            help="Input: name:hash[@path]")
+    run_parser.add_argument("--features", help="Features to extract (comma-separated)")
+    run_parser.add_argument("--cache-dir", help="Cache directory")
+    run_parser.add_argument("--ipfs-primary", action="store_true",
+                            help="Use IPFS-primary mode (everything on IPFS, no local cache)")
+
+    args = parser.parse_args()
+
+    if args.command == "analyze":
+        cmd_analyze(args)
+    elif args.command == "plan":
+        cmd_plan(args)
+    elif args.command == "execute":
+        cmd_execute(args)
+    elif args.command == "run-recipe":
+        if getattr(args, 'ipfs_primary', False):
+            cmd_run_recipe_ipfs(args)
+        else:
+            cmd_run_recipe(args)
+    else:
+        parser.print_help()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/core/artdag/client.py
+++ b/core/artdag/client.py
@@ -0,0 +1,201 @@
+# primitive/client.py
+"""
+Client SDK for the primitive execution server.
+
+Provides a simple API for submitting DAGs and retrieving results.
+
+Usage:
+    client = PrimitiveClient("http://localhost:8080")
+
+    # Build a DAG
+    builder = DAGBuilder()
+    source = builder.source("/path/to/video.mp4")
+    segment = builder.segment(source, duration=5.0)
+    builder.set_output(segment)
+    dag = builder.build()
+
+    # Execute and wait for result
+    result = client.execute(dag)
+    print(f"Output: {result.output_path}")
+"""
+
+import json
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+from urllib.request import urlopen, Request
+from urllib.error import HTTPError, URLError
+
+from .dag import DAG, DAGBuilder
+
+
+@dataclass
+class ExecutionResult:
+    """Result from server execution."""
+    success: bool
+    output_path: Optional[Path] = None
+    error: Optional[str] = None
+    execution_time: float = 0.0
+    nodes_executed: int = 0
+    nodes_cached: int = 0
+
+
+@dataclass
+class CacheStats:
+    """Cache statistics from server."""
+    total_entries: int = 0
+    total_size_bytes: int = 0
+    hits: int = 0
+    misses: int = 0
+    hit_rate: float = 0.0
+
+
+class PrimitiveClient:
+    """
+    Client for the primitive execution server.
+
+    Args:
+        base_url: Server URL (e.g., "http://localhost:8080")
+        timeout: Request timeout in seconds
+    """
+
+    def __init__(self, base_url: str = "http://localhost:8080", timeout: float = 300):
+        self.base_url = base_url.rstrip("/")
+        self.timeout = timeout
+
+    def _request(self, method: str, path: str, data: dict = None) -> dict:
+        """Make HTTP request to server."""
+        url = f"{self.base_url}{path}"
+
+        if data is not None:
+            body = json.dumps(data).encode()
+            headers = {"Content-Type": "application/json"}
+        else:
+            body = None
+            headers = {}
+
+        req = Request(url, data=body, headers=headers, method=method)
+
+        try:
+            with urlopen(req, timeout=self.timeout) as response:
+                return json.loads(response.read().decode())
+        except HTTPError as e:
+            error_body = e.read().decode()
+            try:
+                error_data = json.loads(error_body)
+                raise RuntimeError(error_data.get("error", str(e)))
+            except json.JSONDecodeError:
+                raise RuntimeError(f"HTTP {e.code}: {error_body}")
+        except URLError as e:
+            raise ConnectionError(f"Failed to connect to server: {e}")
+
+    def health(self) -> bool:
+        """Check if server is healthy."""
+        try:
+            result = self._request("GET", "/health")
+            return result.get("status") == "ok"
+        except Exception:
+            return False
+
+    def submit(self, dag: DAG) -> str:
+        """
+        Submit a DAG for execution.
+
+        Args:
+            dag: The DAG to execute
+
+        Returns:
+            Job ID for tracking
+        """
+        result = self._request("POST", "/execute", dag.to_dict())
+        return result["job_id"]
+
+    def status(self, job_id: str) -> str:
+        """
+        Get job status.
+
+        Args:
+            job_id: Job ID from submit()
+
+        Returns:
+            Status: "pending", "running", "completed", or "failed"
+        """
+        result = self._request("GET", f"/status/{job_id}")
+        return result["status"]
+
+    def result(self, job_id: str) -> Optional[ExecutionResult]:
+        """
+        Get job result (non-blocking).
+
+        Args:
+            job_id: Job ID from submit()
+
+        Returns:
+            ExecutionResult if complete, None if still running
+        """
+        data = self._request("GET", f"/result/{job_id}")
+
+        if not data.get("ready", False):
+            return None
+
+        return ExecutionResult(
+            success=data.get("success", False),
+            output_path=Path(data["output_path"]) if data.get("output_path") else None,
+            error=data.get("error"),
+            execution_time=data.get("execution_time", 0),
+            nodes_executed=data.get("nodes_executed", 0),
+            nodes_cached=data.get("nodes_cached", 0),
+        )
+
+    def wait(self, job_id: str, poll_interval: float = 0.5) -> ExecutionResult:
+        """
+        Wait for job completion and return result.
+
+        Args:
+            job_id: Job ID from submit()
+            poll_interval: Seconds between status checks
+
+        Returns:
+            ExecutionResult
+        """
+        while True:
+            result = self.result(job_id)
+            if result is not None:
+                return result
+            time.sleep(poll_interval)
+
+    def execute(self, dag: DAG, poll_interval: float = 0.5) -> ExecutionResult:
+        """
+        Submit DAG and wait for result.
+
+        Convenience method combining submit() and wait().
+
+        Args:
+            dag: The DAG to execute
+            poll_interval: Seconds between status checks
+
+        Returns:
+            ExecutionResult
+        """
+        job_id = self.submit(dag)
+        return self.wait(job_id, poll_interval)
+
+    def cache_stats(self) -> CacheStats:
+        """Get cache statistics."""
+        data = self._request("GET", "/cache/stats")
+        return CacheStats(
+            total_entries=data.get("total_entries", 0),
+            total_size_bytes=data.get("total_size_bytes", 0),
+            hits=data.get("hits", 0),
+            misses=data.get("misses", 0),
+            hit_rate=data.get("hit_rate", 0.0),
+        )
+
+    def clear_cache(self) -> None:
+        """Clear the server cache."""
+        self._request("DELETE", "/cache")
+
+
+# Re-export DAGBuilder for convenience
+__all__ = ["PrimitiveClient", "ExecutionResult", "CacheStats", "DAGBuilder"]
--- a/core/artdag/dag.py
+++ b/core/artdag/dag.py
@@ -0,0 +1,344 @@
+# primitive/dag.py
+"""
+Core DAG data structures.
+
+Nodes are content-addressed: node_id = hash(type + config + input_ids)
+This enables automatic caching and deduplication.
+"""
+
+import hashlib
+import json
+from dataclasses import dataclass, field
+from enum import Enum, auto
+from typing import Any, Dict, List, Optional
+
+
+class NodeType(Enum):
+    """Built-in node types."""
+    # Source operations
+    SOURCE = auto()      # Load file from path
+
+    # Transform operations
+    SEGMENT = auto()     # Extract time range
+    RESIZE = auto()      # Scale/crop/pad
+    TRANSFORM = auto()   # Visual effects (color, blur, etc.)
+
+    # Compose operations
+    SEQUENCE = auto()    # Concatenate in time
+    LAYER = auto()       # Stack spatially (overlay)
+    MUX = auto()         # Combine video + audio streams
+    BLEND = auto()       # Blend two inputs
+    AUDIO_MIX = auto()   # Mix multiple audio streams
+    SWITCH = auto()      # Time-based input switching
+
+    # Analysis operations
+    ANALYZE = auto()     # Extract features (audio, motion, etc.)
+
+    # Generation operations
+    GENERATE = auto()    # Create content (text, graphics, etc.)
+
+
+def _stable_hash(data: Any, algorithm: str = "sha3_256") -> str:
+    """
+    Create stable hash from arbitrary data.
+
+    Uses SHA-3 (Keccak) for quantum resistance.
+    Returns full hash - no truncation.
+
+    Args:
+        data: Data to hash (will be JSON serialized)
+        algorithm: Hash algorithm (default: sha3_256)
+
+    Returns:
+        Full hex digest
+    """
+    # Convert to JSON with sorted keys for stability
+    json_str = json.dumps(data, sort_keys=True, separators=(",", ":"))
+    hasher = hashlib.new(algorithm)
+    hasher.update(json_str.encode())
+    return hasher.hexdigest()
+
+
+@dataclass
+class Node:
+    """
+    A node in the execution DAG.
+
+    Attributes:
+        node_type: The operation type (NodeType enum or string for custom types)
+        config: Operation-specific configuration
+        inputs: List of input node IDs (resolved during execution)
+        node_id: Content-addressed ID (computed from type + config + inputs)
+        name: Optional human-readable name for debugging
+    """
+    node_type: NodeType | str
+    config: Dict[str, Any] = field(default_factory=dict)
+    inputs: List[str] = field(default_factory=list)
+    node_id: Optional[str] = None
+    name: Optional[str] = None
+
+    def __post_init__(self):
+        """Compute node_id if not provided."""
+        if self.node_id is None:
+            self.node_id = self._compute_id()
+
+    def _compute_id(self) -> str:
+        """Compute content-addressed ID from node contents."""
+        type_str = self.node_type.name if isinstance(self.node_type, NodeType) else str(self.node_type)
+        content = {
+            "type": type_str,
+            "config": self.config,
+            "inputs": sorted(self.inputs),  # Sort for stability
+        }
+        return _stable_hash(content)
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Serialize node to dictionary."""
+        type_str = self.node_type.name if isinstance(self.node_type, NodeType) else str(self.node_type)
+        return {
+            "node_id": self.node_id,
+            "node_type": type_str,
+            "config": self.config,
+            "inputs": self.inputs,
+            "name": self.name,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Node":
+        """Deserialize node from dictionary."""
+        type_str = data["node_type"]
+        try:
+            node_type = NodeType[type_str]
+        except KeyError:
+            node_type = type_str  # Custom type as string
+
+        return cls(
+            node_type=node_type,
+            config=data.get("config", {}),
+            inputs=data.get("inputs", []),
+            node_id=data.get("node_id"),
+            name=data.get("name"),
+        )
+
+
+@dataclass
+class DAG:
+    """
+    A directed acyclic graph of nodes.
+
+    Attributes:
+        nodes: Dictionary mapping node_id -> Node
+        output_id: The ID of the final output node
+        metadata: Optional metadata about the DAG (source, version, etc.)
+    """
+    nodes: Dict[str, Node] = field(default_factory=dict)
+    output_id: Optional[str] = None
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def add_node(self, node: Node) -> str:
+        """Add a node to the DAG, returning its ID."""
+        if node.node_id in self.nodes:
+            # Node already exists (deduplication via content addressing)
+            return node.node_id
+        self.nodes[node.node_id] = node
+        return node.node_id
+
+    def set_output(self, node_id: str) -> None:
+        """Set the output node."""
+        if node_id not in self.nodes:
+            raise ValueError(f"Node {node_id} not in DAG")
+        self.output_id = node_id
+
+    def get_node(self, node_id: str) -> Node:
+        """Get a node by ID."""
+        if node_id not in self.nodes:
+            raise KeyError(f"Node {node_id} not found")
+        return self.nodes[node_id]
+
+    def topological_order(self) -> List[str]:
+        """Return nodes in topological order (dependencies first)."""
+        visited = set()
+        order = []
+
+        def visit(node_id: str):
+            if node_id in visited:
+                return
+            visited.add(node_id)
+            node = self.nodes[node_id]
+            for input_id in node.inputs:
+                visit(input_id)
+            order.append(node_id)
+
+        # Visit all nodes (not just output, in case of disconnected components)
+        for node_id in self.nodes:
+            visit(node_id)
+
+        return order
+
+    def validate(self) -> List[str]:
+        """Validate DAG structure. Returns list of errors (empty if valid)."""
+        errors = []
+
+        if self.output_id is None:
+            errors.append("No output node set")
+        elif self.output_id not in self.nodes:
+            errors.append(f"Output node {self.output_id} not in DAG")
+
+        # Check all input references are valid
+        for node_id, node in self.nodes.items():
+            for input_id in node.inputs:
+                if input_id not in self.nodes:
+                    errors.append(f"Node {node_id} references missing input {input_id}")
+
+        # Check for cycles (skip if we already found missing inputs)
+        if not any("missing" in e for e in errors):
+            try:
+                self.topological_order()
+            except (RecursionError, KeyError):
+                errors.append("DAG contains cycles or invalid references")
+
+        return errors
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Serialize DAG to dictionary."""
+        return {
+            "nodes": {nid: node.to_dict() for nid, node in self.nodes.items()},
+            "output_id": self.output_id,
+            "metadata": self.metadata,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "DAG":
+        """Deserialize DAG from dictionary."""
+        dag = cls(metadata=data.get("metadata", {}))
+        for node_data in data.get("nodes", {}).values():
+            dag.add_node(Node.from_dict(node_data))
+        dag.output_id = data.get("output_id")
+        return dag
+
+    def to_json(self) -> str:
+        """Serialize DAG to JSON string."""
+        return json.dumps(self.to_dict(), indent=2)
+
+    @classmethod
+    def from_json(cls, json_str: str) -> "DAG":
+        """Deserialize DAG from JSON string."""
+        return cls.from_dict(json.loads(json_str))
+
+
+class DAGBuilder:
+    """
+    Fluent builder for constructing DAGs.
+
+    Example:
+        builder = DAGBuilder()
+        source = builder.source("/path/to/video.mp4")
+        segment = builder.segment(source, duration=5.0)
+        builder.set_output(segment)
+        dag = builder.build()
+    """
+
+    def __init__(self):
+        self.dag = DAG()
+
+    def _add(self, node_type: NodeType | str, config: Dict[str, Any],
+             inputs: List[str] = None, name: str = None) -> str:
+        """Add a node and return its ID."""
+        node = Node(
+            node_type=node_type,
+            config=config,
+            inputs=inputs or [],
+            name=name,
+        )
+        return self.dag.add_node(node)
+
+    # Source operations
+
+    def source(self, path: str, name: str = None) -> str:
+        """Add a SOURCE node."""
+        return self._add(NodeType.SOURCE, {"path": path}, name=name)
+
+    # Transform operations
+
+    def segment(self, input_id: str, duration: float = None,
+                offset: float = 0, precise: bool = True, name: str = None) -> str:
+        """Add a SEGMENT node."""
+        config = {"offset": offset, "precise": precise}
+        if duration is not None:
+            config["duration"] = duration
+        return self._add(NodeType.SEGMENT, config, [input_id], name=name)
+
+    def resize(self, input_id: str, width: int, height: int,
+               mode: str = "fit", name: str = None) -> str:
+        """Add a RESIZE node."""
+        return self._add(
+            NodeType.RESIZE,
+            {"width": width, "height": height, "mode": mode},
+            [input_id],
+            name=name
+        )
+
+    def transform(self, input_id: str, effects: Dict[str, Any],
+                  name: str = None) -> str:
+        """Add a TRANSFORM node."""
+        return self._add(NodeType.TRANSFORM, {"effects": effects}, [input_id], name=name)
+
+    # Compose operations
+
+    def sequence(self, input_ids: List[str], transition: Dict[str, Any] = None,
+                 name: str = None) -> str:
+        """Add a SEQUENCE node."""
+        config = {"transition": transition or {"type": "cut"}}
+        return self._add(NodeType.SEQUENCE, config, input_ids, name=name)
+
+    def layer(self, input_ids: List[str], configs: List[Dict] = None,
+              name: str = None) -> str:
+        """Add a LAYER node."""
+        return self._add(
+            NodeType.LAYER,
+            {"inputs": configs or [{}] * len(input_ids)},
+            input_ids,
+            name=name
+        )
+
+    def mux(self, video_id: str, audio_id: str, shortest: bool = True,
+            name: str = None) -> str:
+        """Add a MUX node."""
+        return self._add(
+            NodeType.MUX,
+            {"video_stream": 0, "audio_stream": 1, "shortest": shortest},
+            [video_id, audio_id],
+            name=name
+        )
+
+    def blend(self, input1_id: str, input2_id: str, mode: str = "overlay",
+              opacity: float = 0.5, name: str = None) -> str:
+        """Add a BLEND node."""
+        return self._add(
+            NodeType.BLEND,
+            {"mode": mode, "opacity": opacity},
+            [input1_id, input2_id],
+            name=name
+        )
+
+    def audio_mix(self, input_ids: List[str], gains: List[float] = None,
+                  normalize: bool = True, name: str = None) -> str:
+        """Add an AUDIO_MIX node to mix multiple audio streams."""
+        config = {"normalize": normalize}
+        if gains is not None:
+            config["gains"] = gains
+        return self._add(NodeType.AUDIO_MIX, config, input_ids, name=name)
+
+    # Output
+
+    def set_output(self, node_id: str) -> "DAGBuilder":
+        """Set the output node."""
+        self.dag.set_output(node_id)
+        return self
+
+    def build(self) -> DAG:
+        """Build and validate the DAG."""
+        errors = self.dag.validate()
+        if errors:
+            raise ValueError(f"Invalid DAG: {errors}")
+        return self.dag
--- a/core/artdag/effects/init.py
+++ b/core/artdag/effects/init.py
@@ -0,0 +1,55 @@
+"""
+Cacheable effect system.
+
+Effects are single Python files with:
+- PEP 723 embedded dependencies
+- @-tag metadata in docstrings
+- Frame-by-frame or whole-video API
+
+Effects are cached by content hash (SHA3-256) and executed in
+sandboxed environments for determinism.
+"""
+
+from .meta import EffectMeta, ParamSpec, ExecutionContext
+from .loader import load_effect, load_effect_file, LoadedEffect, compute_cid
+from .binding import (
+    AnalysisData,
+    ResolvedBinding,
+    resolve_binding,
+    resolve_all_bindings,
+    bindings_to_lookup_table,
+    has_bindings,
+    extract_binding_sources,
+)
+from .sandbox import Sandbox, SandboxConfig, SandboxResult, is_bwrap_available, get_venv_path
+from .runner import run_effect, run_effect_from_cache, EffectExecutor
+
+__all__ = [
+    # Meta types
+    "EffectMeta",
+    "ParamSpec",
+    "ExecutionContext",
+    # Loader
+    "load_effect",
+    "load_effect_file",
+    "LoadedEffect",
+    "compute_cid",
+    # Binding
+    "AnalysisData",
+    "ResolvedBinding",
+    "resolve_binding",
+    "resolve_all_bindings",
+    "bindings_to_lookup_table",
+    "has_bindings",
+    "extract_binding_sources",
+    # Sandbox
+    "Sandbox",
+    "SandboxConfig",
+    "SandboxResult",
+    "is_bwrap_available",
+    "get_venv_path",
+    # Runner
+    "run_effect",
+    "run_effect_from_cache",
+    "EffectExecutor",
+]
--- a/core/artdag/effects/binding.py
+++ b/core/artdag/effects/binding.py
@@ -0,0 +1,311 @@
+"""
+Parameter binding resolution.
+
+Resolves bind expressions to per-frame lookup tables at plan time.
+Binding options:
+  - :range [lo hi] - map 0-1 to output range
+  - :smooth N - smoothing window in seconds
+  - :offset N - time offset in seconds
+  - :on-event V - value on discrete events
+  - :decay N - exponential decay after event
+  - :noise N - add deterministic noise (seeded)
+  - :seed N - explicit RNG seed
+"""
+
+import hashlib
+import math
+import random
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Tuple
+
+
+@dataclass
+class AnalysisData:
+    """
+    Analysis data for binding resolution.
+
+    Attributes:
+        frame_rate: Video frame rate
+        total_frames: Total number of frames
+        features: Dict mapping feature name to per-frame values
+        events: Dict mapping event name to list of frame indices
+    """
+
+    frame_rate: float
+    total_frames: int
+    features: Dict[str, List[float]]  # feature -> [value_per_frame]
+    events: Dict[str, List[int]]  # event -> [frame_indices]
+
+    def get_feature(self, name: str, frame: int) -> float:
+        """Get feature value at frame, interpolating if needed."""
+        if name not in self.features:
+            return 0.0
+        values = self.features[name]
+        if not values:
+            return 0.0
+        if frame >= len(values):
+            return values[-1]
+        return values[frame]
+
+    def get_events_in_range(
+        self, name: str, start_frame: int, end_frame: int
+    ) -> List[int]:
+        """Get event frames in range."""
+        if name not in self.events:
+            return []
+        return [f for f in self.events[name] if start_frame <= f < end_frame]
+
+
+@dataclass
+class ResolvedBinding:
+    """
+    Resolved binding with per-frame values.
+
+    Attributes:
+        param_name: Parameter this binding applies to
+        values: List of values, one per frame
+    """
+
+    param_name: str
+    values: List[float]
+
+    def get(self, frame: int) -> float:
+        """Get value at frame."""
+        if frame >= len(self.values):
+            return self.values[-1] if self.values else 0.0
+        return self.values[frame]
+
+
+def resolve_binding(
+    binding: Dict[str, Any],
+    analysis: AnalysisData,
+    param_name: str,
+    cache_id: str = None,
+) -> ResolvedBinding:
+    """
+    Resolve a binding specification to per-frame values.
+
+    Args:
+        binding: Binding spec with source, feature, and options
+        analysis: Analysis data with features and events
+        param_name: Name of the parameter being bound
+        cache_id: Cache ID for deterministic seeding
+
+    Returns:
+        ResolvedBinding with values for each frame
+    """
+    feature = binding.get("feature")
+    if not feature:
+        raise ValueError(f"Binding for {param_name} missing feature")
+
+    # Get base values
+    values = []
+    is_event = feature in analysis.events
+
+    if is_event:
+        # Event-based binding
+        on_event = binding.get("on_event", 1.0)
+        decay = binding.get("decay", 0.0)
+        values = _resolve_event_binding(
+            analysis.events.get(feature, []),
+            analysis.total_frames,
+            analysis.frame_rate,
+            on_event,
+            decay,
+        )
+    else:
+        # Continuous feature binding
+        feature_values = analysis.features.get(feature, [])
+        if not feature_values:
+            # No data, use zeros
+            values = [0.0] * analysis.total_frames
+        else:
+            # Extend to total frames if needed
+            values = list(feature_values)
+            while len(values) < analysis.total_frames:
+                values.append(values[-1] if values else 0.0)
+
+    # Apply offset
+    offset = binding.get("offset")
+    if offset:
+        offset_frames = int(offset * analysis.frame_rate)
+        values = _apply_offset(values, offset_frames)
+
+    # Apply smoothing
+    smooth = binding.get("smooth")
+    if smooth:
+        window_frames = int(smooth * analysis.frame_rate)
+        values = _apply_smoothing(values, window_frames)
+
+    # Apply range mapping
+    range_spec = binding.get("range")
+    if range_spec:
+        lo, hi = range_spec
+        values = _apply_range(values, lo, hi)
+
+    # Apply noise
+    noise = binding.get("noise")
+    if noise:
+        seed = binding.get("seed")
+        if seed is None and cache_id:
+            # Derive seed from cache_id for determinism
+            seed = int(hashlib.sha256(cache_id.encode()).hexdigest()[:8], 16)
+        values = _apply_noise(values, noise, seed or 0)
+
+    return ResolvedBinding(param_name=param_name, values=values)
+
+
+def _resolve_event_binding(
+    event_frames: List[int],
+    total_frames: int,
+    frame_rate: float,
+    on_event: float,
+    decay: float,
+) -> List[float]:
+    """
+    Resolve event-based binding with optional decay.
+
+    Args:
+        event_frames: List of frame indices where events occur
+        total_frames: Total number of frames
+        frame_rate: Video frame rate
+        on_event: Value at event
+        decay: Decay time constant in seconds (0 = instant)
+
+    Returns:
+        List of values per frame
+    """
+    values = [0.0] * total_frames
+
+    if not event_frames:
+        return values
+
+    event_set = set(event_frames)
+
+    if decay <= 0:
+        # No decay - just mark event frames
+        for f in event_frames:
+            if 0 <= f < total_frames:
+                values[f] = on_event
+    else:
+        # Apply exponential decay
+        decay_frames = decay * frame_rate
+        for f in event_frames:
+            if f < 0 or f >= total_frames:
+                continue
+            # Apply decay from this event forward
+            for i in range(f, total_frames):
+                elapsed = i - f
+                decayed = on_event * math.exp(-elapsed / decay_frames)
+                if decayed < 0.001:
+                    break
+                values[i] = max(values[i], decayed)
+
+    return values
+
+
+def _apply_offset(values: List[float], offset_frames: int) -> List[float]:
+    """Shift values by offset frames (positive = delay)."""
+    if offset_frames == 0:
+        return values
+
+    n = len(values)
+    result = [0.0] * n
+
+    for i in range(n):
+        src = i - offset_frames
+        if 0 <= src < n:
+            result[i] = values[src]
+
+    return result
+
+
+def _apply_smoothing(values: List[float], window_frames: int) -> List[float]:
+    """Apply moving average smoothing."""
+    if window_frames <= 1:
+        return values
+
+    n = len(values)
+    result = []
+    half = window_frames // 2
+
+    for i in range(n):
+        start = max(0, i - half)
+        end = min(n, i + half + 1)
+        avg = sum(values[start:end]) / (end - start)
+        result.append(avg)
+
+    return result
+
+
+def _apply_range(values: List[float], lo: float, hi: float) -> List[float]:
+    """Map values from 0-1 to lo-hi range."""
+    return [lo + v * (hi - lo) for v in values]
+
+
+def _apply_noise(values: List[float], amount: float, seed: int) -> List[float]:
+    """Add deterministic noise to values."""
+    rng = random.Random(seed)
+    return [v + rng.uniform(-amount, amount) for v in values]
+
+
+def resolve_all_bindings(
+    config: Dict[str, Any],
+    analysis: AnalysisData,
+    cache_id: str = None,
+) -> Dict[str, ResolvedBinding]:
+    """
+    Resolve all bindings in a config dict.
+
+    Looks for values with _binding: True marker.
+
+    Args:
+        config: Node config with potential bindings
+        analysis: Analysis data
+        cache_id: Cache ID for seeding
+
+    Returns:
+        Dict mapping param name to resolved binding
+    """
+    resolved = {}
+
+    for key, value in config.items():
+        if isinstance(value, dict) and value.get("_binding"):
+            resolved[key] = resolve_binding(value, analysis, key, cache_id)
+
+    return resolved
+
+
+def bindings_to_lookup_table(
+    bindings: Dict[str, ResolvedBinding],
+) -> Dict[str, List[float]]:
+    """
+    Convert resolved bindings to simple lookup tables.
+
+    Returns dict mapping param name to list of per-frame values.
+    This format is JSON-serializable for inclusion in execution plans.
+    """
+    return {name: binding.values for name, binding in bindings.items()}
+
+
+def has_bindings(config: Dict[str, Any]) -> bool:
+    """Check if config contains any bindings."""
+    for value in config.values():
+        if isinstance(value, dict) and value.get("_binding"):
+            return True
+    return False
+
+
+def extract_binding_sources(config: Dict[str, Any]) -> List[str]:
+    """
+    Extract all analysis source references from bindings.
+
+    Returns list of node IDs that provide analysis data.
+    """
+    sources = []
+    for value in config.values():
+        if isinstance(value, dict) and value.get("_binding"):
+            source = value.get("source")
+            if source and source not in sources:
+                sources.append(source)
+    return sources
--- a/core/artdag/effects/frame_processor.py
+++ b/core/artdag/effects/frame_processor.py
@@ -0,0 +1,347 @@
+"""
+FFmpeg pipe-based frame processing.
+
+Processes video through Python frame-by-frame effects using FFmpeg pipes:
+  FFmpeg decode -> Python process_frame -> FFmpeg encode
+
+This avoids writing intermediate frames to disk.
+"""
+
+import logging
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class VideoInfo:
+    """Video metadata."""
+
+    width: int
+    height: int
+    frame_rate: float
+    total_frames: int
+    duration: float
+    pixel_format: str = "rgb24"
+
+
+def probe_video(path: Path) -> VideoInfo:
+    """
+    Get video information using ffprobe.
+
+    Args:
+        path: Path to video file
+
+    Returns:
+        VideoInfo with dimensions, frame rate, etc.
+    """
+    cmd = [
+        "ffprobe",
+        "-v", "error",
+        "-select_streams", "v:0",
+        "-show_entries", "stream=width,height,r_frame_rate,nb_frames,duration",
+        "-of", "csv=p=0",
+        str(path),
+    ]
+
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise RuntimeError(f"ffprobe failed: {result.stderr}")
+
+    parts = result.stdout.strip().split(",")
+    if len(parts) < 4:
+        raise RuntimeError(f"Unexpected ffprobe output: {result.stdout}")
+
+    width = int(parts[0])
+    height = int(parts[1])
+
+    # Parse frame rate (could be "30/1" or "30")
+    fr_parts = parts[2].split("/")
+    if len(fr_parts) == 2:
+        frame_rate = float(fr_parts[0]) / float(fr_parts[1])
+    else:
+        frame_rate = float(fr_parts[0])
+
+    # nb_frames might be N/A
+    total_frames = 0
+    duration = 0.0
+    try:
+        total_frames = int(parts[3])
+    except (ValueError, IndexError):
+        pass
+
+    try:
+        duration = float(parts[4]) if len(parts) > 4 else 0.0
+    except (ValueError, IndexError):
+        pass
+
+    if total_frames == 0 and duration > 0:
+        total_frames = int(duration * frame_rate)
+
+    return VideoInfo(
+        width=width,
+        height=height,
+        frame_rate=frame_rate,
+        total_frames=total_frames,
+        duration=duration,
+    )
+
+
+FrameProcessor = Callable[[np.ndarray, Dict[str, Any], Any], Tuple[np.ndarray, Any]]
+
+
+def process_video(
+    input_path: Path,
+    output_path: Path,
+    process_frame: FrameProcessor,
+    params: Dict[str, Any],
+    bindings: Dict[str, List[float]] = None,
+    initial_state: Any = None,
+    pixel_format: str = "rgb24",
+    output_codec: str = "libx264",
+    output_options: List[str] = None,
+) -> Tuple[Path, Any]:
+    """
+    Process video through frame-by-frame effect.
+
+    Args:
+        input_path: Input video path
+        output_path: Output video path
+        process_frame: Function (frame, params, state) -> (frame, state)
+        params: Static parameter dict
+        bindings: Per-frame parameter lookup tables
+        initial_state: Initial state for process_frame
+        pixel_format: Pixel format for frame data
+        output_codec: Video codec for output
+        output_options: Additional ffmpeg output options
+
+    Returns:
+        Tuple of (output_path, final_state)
+    """
+    bindings = bindings or {}
+    output_options = output_options or []
+
+    # Probe input
+    info = probe_video(input_path)
+    logger.info(f"Processing {info.width}x{info.height} @ {info.frame_rate}fps")
+
+    # Calculate bytes per frame
+    if pixel_format == "rgb24":
+        bytes_per_pixel = 3
+    elif pixel_format == "rgba":
+        bytes_per_pixel = 4
+    else:
+        bytes_per_pixel = 3  # Default to RGB
+
+    frame_size = info.width * info.height * bytes_per_pixel
+
+    # Start decoder process
+    decode_cmd = [
+        "ffmpeg",
+        "-i", str(input_path),
+        "-f", "rawvideo",
+        "-pix_fmt", pixel_format,
+        "-",
+    ]
+
+    # Start encoder process
+    encode_cmd = [
+        "ffmpeg",
+        "-y",
+        "-f", "rawvideo",
+        "-pix_fmt", pixel_format,
+        "-s", f"{info.width}x{info.height}",
+        "-r", str(info.frame_rate),
+        "-i", "-",
+        "-i", str(input_path),  # For audio
+        "-map", "0:v",
+        "-map", "1:a?",
+        "-c:v", output_codec,
+        "-c:a", "aac",
+        *output_options,
+        str(output_path),
+    ]
+
+    logger.debug(f"Decoder: {' '.join(decode_cmd)}")
+    logger.debug(f"Encoder: {' '.join(encode_cmd)}")
+
+    decoder = subprocess.Popen(
+        decode_cmd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.DEVNULL,
+    )
+
+    encoder = subprocess.Popen(
+        encode_cmd,
+        stdin=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+
+    state = initial_state
+    frame_idx = 0
+
+    try:
+        while True:
+            # Read frame from decoder
+            raw_frame = decoder.stdout.read(frame_size)
+            if len(raw_frame) < frame_size:
+                break
+
+            # Convert to numpy
+            frame = np.frombuffer(raw_frame, dtype=np.uint8)
+            frame = frame.reshape((info.height, info.width, bytes_per_pixel))
+
+            # Build per-frame params
+            frame_params = dict(params)
+            for param_name, values in bindings.items():
+                if frame_idx < len(values):
+                    frame_params[param_name] = values[frame_idx]
+
+            # Process frame
+            processed, state = process_frame(frame, frame_params, state)
+
+            # Ensure correct shape and dtype
+            if processed.shape != frame.shape:
+                raise ValueError(
+                    f"Frame shape mismatch: {processed.shape} vs {frame.shape}"
+                )
+            processed = processed.astype(np.uint8)
+
+            # Write to encoder
+            encoder.stdin.write(processed.tobytes())
+            frame_idx += 1
+
+            if frame_idx % 100 == 0:
+                logger.debug(f"Processed frame {frame_idx}")
+
+    except Exception as e:
+        logger.error(f"Frame processing failed at frame {frame_idx}: {e}")
+        raise
+    finally:
+        decoder.stdout.close()
+        decoder.wait()
+        encoder.stdin.close()
+        encoder.wait()
+
+    if encoder.returncode != 0:
+        stderr = encoder.stderr.read().decode() if encoder.stderr else ""
+        raise RuntimeError(f"Encoder failed: {stderr}")
+
+    logger.info(f"Processed {frame_idx} frames")
+    return output_path, state
+
+
+def process_video_batch(
+    input_path: Path,
+    output_path: Path,
+    process_frames: Callable[[List[np.ndarray], Dict[str, Any]], List[np.ndarray]],
+    params: Dict[str, Any],
+    batch_size: int = 30,
+    pixel_format: str = "rgb24",
+    output_codec: str = "libx264",
+) -> Path:
+    """
+    Process video in batches for effects that need temporal context.
+
+    Args:
+        input_path: Input video path
+        output_path: Output video path
+        process_frames: Function (frames_batch, params) -> processed_batch
+        params: Parameter dict
+        batch_size: Number of frames per batch
+        pixel_format: Pixel format
+        output_codec: Output codec
+
+    Returns:
+        Output path
+    """
+    info = probe_video(input_path)
+
+    if pixel_format == "rgb24":
+        bytes_per_pixel = 3
+    elif pixel_format == "rgba":
+        bytes_per_pixel = 4
+    else:
+        bytes_per_pixel = 3
+
+    frame_size = info.width * info.height * bytes_per_pixel
+
+    decode_cmd = [
+        "ffmpeg",
+        "-i", str(input_path),
+        "-f", "rawvideo",
+        "-pix_fmt", pixel_format,
+        "-",
+    ]
+
+    encode_cmd = [
+        "ffmpeg",
+        "-y",
+        "-f", "rawvideo",
+        "-pix_fmt", pixel_format,
+        "-s", f"{info.width}x{info.height}",
+        "-r", str(info.frame_rate),
+        "-i", "-",
+        "-i", str(input_path),
+        "-map", "0:v",
+        "-map", "1:a?",
+        "-c:v", output_codec,
+        "-c:a", "aac",
+        str(output_path),
+    ]
+
+    decoder = subprocess.Popen(
+        decode_cmd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.DEVNULL,
+    )
+
+    encoder = subprocess.Popen(
+        encode_cmd,
+        stdin=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+
+    batch = []
+    total_processed = 0
+
+    try:
+        while True:
+            raw_frame = decoder.stdout.read(frame_size)
+            if len(raw_frame) < frame_size:
+                # Process remaining batch
+                if batch:
+                    processed = process_frames(batch, params)
+                    for frame in processed:
+                        encoder.stdin.write(frame.astype(np.uint8).tobytes())
+                        total_processed += 1
+                break
+
+            frame = np.frombuffer(raw_frame, dtype=np.uint8)
+            frame = frame.reshape((info.height, info.width, bytes_per_pixel))
+            batch.append(frame)
+
+            if len(batch) >= batch_size:
+                processed = process_frames(batch, params)
+                for frame in processed:
+                    encoder.stdin.write(frame.astype(np.uint8).tobytes())
+                    total_processed += 1
+                batch = []
+
+    finally:
+        decoder.stdout.close()
+        decoder.wait()
+        encoder.stdin.close()
+        encoder.wait()
+
+    if encoder.returncode != 0:
+        stderr = encoder.stderr.read().decode() if encoder.stderr else ""
+        raise RuntimeError(f"Encoder failed: {stderr}")
+
+    logger.info(f"Processed {total_processed} frames in batches of {batch_size}")
+    return output_path
--- a/core/artdag/effects/loader.py
+++ b/core/artdag/effects/loader.py
@@ -0,0 +1,455 @@
+"""
+Effect file loader.
+
+Parses effect files with:
+- PEP 723 inline script metadata for dependencies
+- @-tag docstrings for effect metadata
+- META object for programmatic access
+"""
+
+import ast
+import hashlib
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+from .meta import EffectMeta, ParamSpec
+
+
+@dataclass
+class LoadedEffect:
+    """
+    A loaded effect with all metadata.
+
+    Attributes:
+        source: Original source code
+        cid: SHA3-256 hash of source
+        meta: Extracted EffectMeta
+        dependencies: List of pip dependencies
+        requires_python: Python version requirement
+        module: Compiled module (if loaded)
+    """
+
+    source: str
+    cid: str
+    meta: EffectMeta
+    dependencies: List[str] = field(default_factory=list)
+    requires_python: str = ">=3.10"
+    module: Any = None
+
+    def has_frame_api(self) -> bool:
+        """Check if effect has frame-by-frame API."""
+        return self.meta.api_type == "frame"
+
+    def has_video_api(self) -> bool:
+        """Check if effect has whole-video API."""
+        return self.meta.api_type == "video"
+
+
+def compute_cid(source: str) -> str:
+    """Compute SHA3-256 hash of effect source."""
+    return hashlib.sha3_256(source.encode("utf-8")).hexdigest()
+
+
+def parse_pep723_metadata(source: str) -> Tuple[List[str], str]:
+    """
+    Parse PEP 723 inline script metadata.
+
+    Looks for:
+        # /// script
+        # requires-python = ">=3.10"
+        # dependencies = ["numpy", "opencv-python"]
+        # ///
+
+    Returns:
+        Tuple of (dependencies list, requires_python string)
+    """
+    dependencies = []
+    requires_python = ">=3.10"
+
+    # Match the script block
+    pattern = r"# /// script\n(.*?)# ///"
+    match = re.search(pattern, source, re.DOTALL)
+
+    if not match:
+        return dependencies, requires_python
+
+    block = match.group(1)
+
+    # Parse dependencies
+    deps_match = re.search(r'# dependencies = \[(.*?)\]', block, re.DOTALL)
+    if deps_match:
+        deps_str = deps_match.group(1)
+        # Extract quoted strings
+        dependencies = re.findall(r'"([^"]+)"', deps_str)
+
+    # Parse requires-python
+    python_match = re.search(r'# requires-python = "([^"]+)"', block)
+    if python_match:
+        requires_python = python_match.group(1)
+
+    return dependencies, requires_python
+
+
+def parse_docstring_metadata(docstring: str) -> Dict[str, Any]:
+    """
+    Parse @-tag metadata from docstring.
+
+    Supports:
+        @effect name
+        @version 1.0.0
+        @author @user@domain
+        @temporal false
+        @description
+        Multi-line description text.
+
+        @param name type
+          @range lo hi
+          @default value
+          Description text.
+
+        @example
+          (fx effect :param value)
+
+    Returns:
+        Dictionary with extracted metadata
+    """
+    if not docstring:
+        return {}
+
+    result = {
+        "name": "",
+        "version": "1.0.0",
+        "author": "",
+        "temporal": False,
+        "description": "",
+        "params": [],
+        "examples": [],
+    }
+
+    lines = docstring.strip().split("\n")
+    i = 0
+    current_param = None
+
+    while i < len(lines):
+        line = lines[i].strip()
+
+        if line.startswith("@effect "):
+            result["name"] = line[8:].strip()
+
+        elif line.startswith("@version "):
+            result["version"] = line[9:].strip()
+
+        elif line.startswith("@author "):
+            result["author"] = line[8:].strip()
+
+        elif line.startswith("@temporal "):
+            val = line[10:].strip().lower()
+            result["temporal"] = val in ("true", "yes", "1")
+
+        elif line.startswith("@description"):
+            # Collect multi-line description
+            desc_lines = []
+            i += 1
+            while i < len(lines):
+                next_line = lines[i]
+                if next_line.strip().startswith("@"):
+                    i -= 1  # Back up to process this tag
+                    break
+                desc_lines.append(next_line)
+                i += 1
+            result["description"] = "\n".join(desc_lines).strip()
+
+        elif line.startswith("@param "):
+            # Parse parameter: @param name type
+            parts = line[7:].split()
+            if len(parts) >= 2:
+                current_param = {
+                    "name": parts[0],
+                    "type": parts[1],
+                    "range": None,
+                    "default": None,
+                    "description": "",
+                }
+                # Collect param details
+                desc_lines = []
+                i += 1
+                while i < len(lines):
+                    next_line = lines[i]
+                    stripped = next_line.strip()
+
+                    if stripped.startswith("@range "):
+                        range_parts = stripped[7:].split()
+                        if len(range_parts) >= 2:
+                            try:
+                                current_param["range"] = (
+                                    float(range_parts[0]),
+                                    float(range_parts[1]),
+                                )
+                            except ValueError:
+                                pass
+
+                    elif stripped.startswith("@default "):
+                        current_param["default"] = stripped[9:].strip()
+
+                    elif stripped.startswith("@param ") or stripped.startswith("@example"):
+                        i -= 1  # Back up
+                        break
+
+                    elif stripped.startswith("@"):
+                        i -= 1
+                        break
+
+                    elif stripped:
+                        desc_lines.append(stripped)
+
+                    i += 1
+
+                current_param["description"] = " ".join(desc_lines)
+                result["params"].append(current_param)
+                current_param = None
+
+        elif line.startswith("@example"):
+            # Collect example
+            example_lines = []
+            i += 1
+            while i < len(lines):
+                next_line = lines[i]
+                if next_line.strip().startswith("@") and not next_line.strip().startswith("@example"):
+                    if next_line.strip().startswith("@example"):
+                        i -= 1
+                    break
+                if next_line.strip().startswith("@example"):
+                    i -= 1
+                    break
+                example_lines.append(next_line)
+                i += 1
+            example = "\n".join(example_lines).strip()
+            if example:
+                result["examples"].append(example)
+
+        i += 1
+
+    return result
+
+
+def extract_meta_from_ast(source: str) -> Optional[Dict[str, Any]]:
+    """
+    Extract META object from source AST.
+
+    Looks for:
+        META = EffectMeta(...)
+
+    Returns the keyword arguments if found.
+    """
+    try:
+        tree = ast.parse(source)
+    except SyntaxError:
+        return None
+
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Assign):
+            for target in node.targets:
+                if isinstance(target, ast.Name) and target.id == "META":
+                    if isinstance(node.value, ast.Call):
+                        return _extract_call_kwargs(node.value)
+    return None
+
+
+def _extract_call_kwargs(call: ast.Call) -> Dict[str, Any]:
+    """Extract keyword arguments from an AST Call node."""
+    result = {}
+
+    for keyword in call.keywords:
+        if keyword.arg is None:
+            continue
+        value = _ast_to_value(keyword.value)
+        if value is not None:
+            result[keyword.arg] = value
+
+    return result
+
+
+def _ast_to_value(node: ast.expr) -> Any:
+    """Convert AST node to Python value."""
+    if isinstance(node, ast.Constant):
+        return node.value
+    elif isinstance(node, ast.Str):  # Python 3.7 compat
+        return node.s
+    elif isinstance(node, ast.Num):  # Python 3.7 compat
+        return node.n
+    elif isinstance(node, ast.NameConstant):  # Python 3.7 compat
+        return node.value
+    elif isinstance(node, ast.List):
+        return [_ast_to_value(elt) for elt in node.elts]
+    elif isinstance(node, ast.Tuple):
+        return tuple(_ast_to_value(elt) for elt in node.elts)
+    elif isinstance(node, ast.Dict):
+        return {
+            _ast_to_value(k): _ast_to_value(v)
+            for k, v in zip(node.keys, node.values)
+            if k is not None
+        }
+    elif isinstance(node, ast.Call):
+        # Handle ParamSpec(...) calls
+        if isinstance(node.func, ast.Name) and node.func.id == "ParamSpec":
+            return _extract_call_kwargs(node)
+    return None
+
+
+def get_module_docstring(source: str) -> str:
+    """Extract the module-level docstring from source."""
+    try:
+        tree = ast.parse(source)
+    except SyntaxError:
+        return ""
+
+    if tree.body and isinstance(tree.body[0], ast.Expr):
+        if isinstance(tree.body[0].value, ast.Constant):
+            return tree.body[0].value.value
+        elif isinstance(tree.body[0].value, ast.Str):  # Python 3.7 compat
+            return tree.body[0].value.s
+    return ""
+
+
+def load_effect(source: str) -> LoadedEffect:
+    """
+    Load an effect from source code.
+
+    Parses:
+    1. PEP 723 metadata for dependencies
+    2. Module docstring for @-tag metadata
+    3. META object for programmatic metadata
+
+    Priority: META object > docstring > defaults
+
+    Args:
+        source: Effect source code
+
+    Returns:
+        LoadedEffect with all metadata
+
+    Raises:
+        ValueError: If effect is invalid
+    """
+    cid = compute_cid(source)
+
+    # Parse PEP 723 metadata
+    dependencies, requires_python = parse_pep723_metadata(source)
+
+    # Parse docstring metadata
+    docstring = get_module_docstring(source)
+    doc_meta = parse_docstring_metadata(docstring)
+
+    # Try to extract META from AST
+    ast_meta = extract_meta_from_ast(source)
+
+    # Build EffectMeta, preferring META object over docstring
+    name = ""
+    if ast_meta and "name" in ast_meta:
+        name = ast_meta["name"]
+    elif doc_meta.get("name"):
+        name = doc_meta["name"]
+
+    if not name:
+        raise ValueError("Effect must have a name (@effect or META.name)")
+
+    version = ast_meta.get("version") if ast_meta else doc_meta.get("version", "1.0.0")
+    temporal = ast_meta.get("temporal") if ast_meta else doc_meta.get("temporal", False)
+    author = ast_meta.get("author") if ast_meta else doc_meta.get("author", "")
+    description = ast_meta.get("description") if ast_meta else doc_meta.get("description", "")
+    examples = ast_meta.get("examples") if ast_meta else doc_meta.get("examples", [])
+
+    # Build params
+    params = []
+    if ast_meta and "params" in ast_meta:
+        for p in ast_meta["params"]:
+            if isinstance(p, dict):
+                type_map = {"float": float, "int": int, "bool": bool, "str": str}
+                param_type = type_map.get(p.get("param_type", "float"), float)
+                if isinstance(p.get("param_type"), type):
+                    param_type = p["param_type"]
+                params.append(
+                    ParamSpec(
+                        name=p.get("name", ""),
+                        param_type=param_type,
+                        default=p.get("default"),
+                        range=p.get("range"),
+                        description=p.get("description", ""),
+                    )
+                )
+    elif doc_meta.get("params"):
+        for p in doc_meta["params"]:
+            type_map = {"float": float, "int": int, "bool": bool, "str": str}
+            param_type = type_map.get(p.get("type", "float"), float)
+
+            default = p.get("default")
+            if default is not None:
+                try:
+                    default = param_type(default)
+                except (ValueError, TypeError):
+                    pass
+
+            params.append(
+                ParamSpec(
+                    name=p["name"],
+                    param_type=param_type,
+                    default=default,
+                    range=p.get("range"),
+                    description=p.get("description", ""),
+                )
+            )
+
+    # Determine API type by checking for function definitions
+    api_type = "frame"  # default
+    try:
+        tree = ast.parse(source)
+        for node in ast.walk(tree):
+            if isinstance(node, ast.FunctionDef):
+                if node.name == "process":
+                    api_type = "video"
+                    break
+                elif node.name == "process_frame":
+                    api_type = "frame"
+                    break
+    except SyntaxError:
+        pass
+
+    meta = EffectMeta(
+        name=name,
+        version=version if isinstance(version, str) else "1.0.0",
+        temporal=bool(temporal),
+        params=params,
+        author=author if isinstance(author, str) else "",
+        description=description if isinstance(description, str) else "",
+        examples=examples if isinstance(examples, list) else [],
+        dependencies=dependencies,
+        requires_python=requires_python,
+        api_type=api_type,
+    )
+
+    return LoadedEffect(
+        source=source,
+        cid=cid,
+        meta=meta,
+        dependencies=dependencies,
+        requires_python=requires_python,
+    )
+
+
+def load_effect_file(path: Path) -> LoadedEffect:
+    """Load an effect from a file path."""
+    source = path.read_text(encoding="utf-8")
+    return load_effect(source)
+
+
+def compute_deps_hash(dependencies: List[str]) -> str:
+    """
+    Compute hash of sorted dependencies.
+
+    Used for venv caching - same deps = same hash = reuse venv.
+    """
+    sorted_deps = sorted(dep.lower().strip() for dep in dependencies)
+    deps_str = "\n".join(sorted_deps)
+    return hashlib.sha3_256(deps_str.encode("utf-8")).hexdigest()
--- a/core/artdag/effects/meta.py
+++ b/core/artdag/effects/meta.py
@@ -0,0 +1,247 @@
+"""
+Effect metadata types.
+
+Defines the core dataclasses for effect metadata:
+- ParamSpec: Parameter specification with type, range, and default
+- EffectMeta: Complete effect metadata including params and flags
+"""
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Tuple, Type, Union
+
+
+@dataclass
+class ParamSpec:
+    """
+    Specification for an effect parameter.
+
+    Attributes:
+        name: Parameter name (used in recipes as :name)
+        param_type: Python type (float, int, bool, str)
+        default: Default value if not specified
+        range: Optional (min, max) tuple for numeric types
+        description: Human-readable description
+        choices: Optional list of allowed values (for enums)
+    """
+
+    name: str
+    param_type: Type
+    default: Any = None
+    range: Optional[Tuple[float, float]] = None
+    description: str = ""
+    choices: Optional[List[Any]] = None
+
+    def validate(self, value: Any) -> Any:
+        """
+        Validate and coerce a parameter value.
+
+        Args:
+            value: Input value to validate
+
+        Returns:
+            Validated and coerced value
+
+        Raises:
+            ValueError: If value is invalid
+        """
+        if value is None:
+            if self.default is not None:
+                return self.default
+            raise ValueError(f"Parameter '{self.name}' requires a value")
+
+        # Type coercion
+        try:
+            if self.param_type == bool:
+                if isinstance(value, str):
+                    value = value.lower() in ("true", "1", "yes")
+                else:
+                    value = bool(value)
+            elif self.param_type == int:
+                value = int(value)
+            elif self.param_type == float:
+                value = float(value)
+            elif self.param_type == str:
+                value = str(value)
+            else:
+                value = self.param_type(value)
+        except (ValueError, TypeError) as e:
+            raise ValueError(
+                f"Parameter '{self.name}' expects {self.param_type.__name__}, "
+                f"got {type(value).__name__}: {e}"
+            )
+
+        # Range check for numeric types
+        if self.range is not None and self.param_type in (int, float):
+            min_val, max_val = self.range
+            if value < min_val or value > max_val:
+                raise ValueError(
+                    f"Parameter '{self.name}' must be in range "
+                    f"[{min_val}, {max_val}], got {value}"
+                )
+
+        # Choices check
+        if self.choices is not None and value not in self.choices:
+            raise ValueError(
+                f"Parameter '{self.name}' must be one of {self.choices}, got {value}"
+            )
+
+        return value
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        d = {
+            "name": self.name,
+            "type": self.param_type.__name__,
+            "description": self.description,
+        }
+        if self.default is not None:
+            d["default"] = self.default
+        if self.range is not None:
+            d["range"] = list(self.range)
+        if self.choices is not None:
+            d["choices"] = self.choices
+        return d
+
+
+@dataclass
+class EffectMeta:
+    """
+    Complete metadata for an effect.
+
+    Attributes:
+        name: Effect name (used in recipes)
+        version: Semantic version string
+        temporal: If True, effect needs complete input (can't be collapsed)
+        params: List of parameter specifications
+        author: Optional author identifier
+        description: Human-readable description
+        examples: List of example S-expression usages
+        dependencies: List of Python package dependencies
+        requires_python: Minimum Python version
+        api_type: "frame" for frame-by-frame, "video" for whole-video
+    """
+
+    name: str
+    version: str = "1.0.0"
+    temporal: bool = False
+    params: List[ParamSpec] = field(default_factory=list)
+    author: str = ""
+    description: str = ""
+    examples: List[str] = field(default_factory=list)
+    dependencies: List[str] = field(default_factory=list)
+    requires_python: str = ">=3.10"
+    api_type: str = "frame"  # "frame" or "video"
+
+    def get_param(self, name: str) -> Optional[ParamSpec]:
+        """Get a parameter spec by name."""
+        for param in self.params:
+            if param.name == name:
+                return param
+        return None
+
+    def validate_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Validate all parameters.
+
+        Args:
+            params: Dictionary of parameter values
+
+        Returns:
+            Dictionary with validated/coerced values including defaults
+
+        Raises:
+            ValueError: If any parameter is invalid
+        """
+        result = {}
+        for spec in self.params:
+            value = params.get(spec.name)
+            result[spec.name] = spec.validate(value)
+        return result
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            "name": self.name,
+            "version": self.version,
+            "temporal": self.temporal,
+            "params": [p.to_dict() for p in self.params],
+            "author": self.author,
+            "description": self.description,
+            "examples": self.examples,
+            "dependencies": self.dependencies,
+            "requires_python": self.requires_python,
+            "api_type": self.api_type,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "EffectMeta":
+        """Create from dictionary."""
+        params = []
+        for p in data.get("params", []):
+            # Map type name back to Python type
+            type_map = {"float": float, "int": int, "bool": bool, "str": str}
+            param_type = type_map.get(p.get("type", "float"), float)
+            params.append(
+                ParamSpec(
+                    name=p["name"],
+                    param_type=param_type,
+                    default=p.get("default"),
+                    range=tuple(p["range"]) if p.get("range") else None,
+                    description=p.get("description", ""),
+                    choices=p.get("choices"),
+                )
+            )
+
+        return cls(
+            name=data["name"],
+            version=data.get("version", "1.0.0"),
+            temporal=data.get("temporal", False),
+            params=params,
+            author=data.get("author", ""),
+            description=data.get("description", ""),
+            examples=data.get("examples", []),
+            dependencies=data.get("dependencies", []),
+            requires_python=data.get("requires_python", ">=3.10"),
+            api_type=data.get("api_type", "frame"),
+        )
+
+
+@dataclass
+class ExecutionContext:
+    """
+    Context passed to effect execution.
+
+    Provides controlled access to resources within sandbox.
+    """
+
+    input_paths: List[str]
+    output_path: str
+    params: Dict[str, Any]
+    seed: int  # Deterministic seed for RNG
+    frame_rate: float = 30.0
+    width: int = 1920
+    height: int = 1080
+
+    # Resolved bindings (frame -> param value lookup)
+    bindings: Dict[str, List[float]] = field(default_factory=dict)
+
+    def get_param_at_frame(self, param_name: str, frame: int) -> Any:
+        """
+        Get parameter value at a specific frame.
+
+        If parameter has a binding, looks up the bound value.
+        Otherwise returns the static parameter value.
+        """
+        if param_name in self.bindings:
+            binding_values = self.bindings[param_name]
+            if frame < len(binding_values):
+                return binding_values[frame]
+            # Past end of binding data, use last value
+            return binding_values[-1] if binding_values else self.params.get(param_name)
+        return self.params.get(param_name)
+
+    def get_rng(self) -> "random.Random":
+        """Get a seeded random number generator."""
+        import random
+
+        return random.Random(self.seed)
--- a/core/artdag/effects/runner.py
+++ b/core/artdag/effects/runner.py
@@ -0,0 +1,259 @@
+"""
+Effect runner.
+
+Main entry point for executing cached effects with sandboxing.
+"""
+
+import logging
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from .binding import AnalysisData, bindings_to_lookup_table, resolve_all_bindings
+from .loader import load_effect, LoadedEffect
+from .meta import ExecutionContext
+from .sandbox import Sandbox, SandboxConfig, SandboxResult, get_venv_path
+
+logger = logging.getLogger(__name__)
+
+
+def run_effect(
+    effect_source: str,
+    input_paths: List[Path],
+    output_path: Path,
+    params: Dict[str, Any],
+    analysis: Optional[AnalysisData] = None,
+    cache_id: str = None,
+    seed: int = 0,
+    trust_level: str = "untrusted",
+    timeout: int = 3600,
+) -> SandboxResult:
+    """
+    Run an effect with full sandboxing.
+
+    This is the main entry point for effect execution.
+
+    Args:
+        effect_source: Effect source code
+        input_paths: List of input file paths
+        output_path: Output file path
+        params: Effect parameters (may contain bindings)
+        analysis: Optional analysis data for binding resolution
+        cache_id: Cache ID for deterministic seeding
+        seed: RNG seed (overrides cache_id-based seed)
+        trust_level: "untrusted" or "trusted"
+        timeout: Maximum execution time in seconds
+
+    Returns:
+        SandboxResult with success status and output
+    """
+    # Load and validate effect
+    loaded = load_effect(effect_source)
+    logger.info(f"Running effect '{loaded.meta.name}' v{loaded.meta.version}")
+
+    # Resolve bindings if analysis data available
+    bindings = {}
+    if analysis:
+        resolved = resolve_all_bindings(params, analysis, cache_id)
+        bindings = bindings_to_lookup_table(resolved)
+        # Remove binding dicts from params, keeping only resolved values
+        params = {
+            k: v for k, v in params.items()
+            if not (isinstance(v, dict) and v.get("_binding"))
+        }
+
+    # Validate parameters
+    validated_params = loaded.meta.validate_params(params)
+
+    # Get or create venv for dependencies
+    venv_path = None
+    if loaded.dependencies:
+        venv_path = get_venv_path(loaded.dependencies)
+
+    # Configure sandbox
+    config = SandboxConfig(
+        trust_level=trust_level,
+        venv_path=venv_path,
+        timeout=timeout,
+    )
+
+    # Write effect to temp file
+    import tempfile
+    with tempfile.NamedTemporaryFile(
+        mode="w",
+        suffix=".py",
+        delete=False,
+    ) as f:
+        f.write(effect_source)
+        effect_path = Path(f.name)
+
+    try:
+        with Sandbox(config) as sandbox:
+            result = sandbox.run_effect(
+                effect_path=effect_path,
+                input_paths=input_paths,
+                output_path=output_path,
+                params=validated_params,
+                bindings=bindings,
+                seed=seed,
+            )
+    finally:
+        effect_path.unlink(missing_ok=True)
+
+    return result
+
+
+def run_effect_from_cache(
+    cache,
+    effect_hash: str,
+    input_paths: List[Path],
+    output_path: Path,
+    params: Dict[str, Any],
+    analysis: Optional[AnalysisData] = None,
+    cache_id: str = None,
+    seed: int = 0,
+    trust_level: str = "untrusted",
+    timeout: int = 3600,
+) -> SandboxResult:
+    """
+    Run an effect from cache by content hash.
+
+    Args:
+        cache: Cache instance
+        effect_hash: Content hash of effect
+        input_paths: Input file paths
+        output_path: Output file path
+        params: Effect parameters
+        analysis: Optional analysis data
+        cache_id: Cache ID for seeding
+        seed: RNG seed
+        trust_level: "untrusted" or "trusted"
+        timeout: Max execution time
+
+    Returns:
+        SandboxResult
+    """
+    effect_source = cache.get_effect(effect_hash)
+    if not effect_source:
+        return SandboxResult(
+            success=False,
+            error=f"Effect not found in cache: {effect_hash[:16]}...",
+        )
+
+    return run_effect(
+        effect_source=effect_source,
+        input_paths=input_paths,
+        output_path=output_path,
+        params=params,
+        analysis=analysis,
+        cache_id=cache_id,
+        seed=seed,
+        trust_level=trust_level,
+        timeout=timeout,
+    )
+
+
+def check_effect_temporal(cache, effect_hash: str) -> bool:
+    """
+    Check if an effect is temporal (can't be collapsed).
+
+    Args:
+        cache: Cache instance
+        effect_hash: Content hash of effect
+
+    Returns:
+        True if effect is temporal
+    """
+    metadata = cache.get_effect_metadata(effect_hash)
+    if not metadata:
+        return False
+
+    meta = metadata.get("meta", {})
+    return meta.get("temporal", False)
+
+
+def get_effect_api_type(cache, effect_hash: str) -> str:
+    """
+    Get the API type of an effect.
+
+    Args:
+        cache: Cache instance
+        effect_hash: Content hash of effect
+
+    Returns:
+        "frame" or "video"
+    """
+    metadata = cache.get_effect_metadata(effect_hash)
+    if not metadata:
+        return "frame"
+
+    meta = metadata.get("meta", {})
+    return meta.get("api_type", "frame")
+
+
+class EffectExecutor:
+    """
+    Executor for cached effects.
+
+    Provides a higher-level interface for effect execution.
+    """
+
+    def __init__(self, cache, trust_level: str = "untrusted"):
+        """
+        Initialize executor.
+
+        Args:
+            cache: Cache instance
+            trust_level: Default trust level
+        """
+        self.cache = cache
+        self.trust_level = trust_level
+
+    def execute(
+        self,
+        effect_hash: str,
+        input_paths: List[Path],
+        output_path: Path,
+        params: Dict[str, Any],
+        analysis: Optional[AnalysisData] = None,
+        step_cache_id: str = None,
+    ) -> SandboxResult:
+        """
+        Execute an effect.
+
+        Args:
+            effect_hash: Content hash of effect
+            input_paths: Input file paths
+            output_path: Output path
+            params: Effect parameters
+            analysis: Analysis data for bindings
+            step_cache_id: Step cache ID for seeding
+
+        Returns:
+            SandboxResult
+        """
+        # Check effect metadata for trust level override
+        metadata = self.cache.get_effect_metadata(effect_hash)
+        trust_level = self.trust_level
+        if metadata:
+            # L1 owner can mark effect as trusted
+            if metadata.get("trust_level") == "trusted":
+                trust_level = "trusted"
+
+        return run_effect_from_cache(
+            cache=self.cache,
+            effect_hash=effect_hash,
+            input_paths=input_paths,
+            output_path=output_path,
+            params=params,
+            analysis=analysis,
+            cache_id=step_cache_id,
+            trust_level=trust_level,
+        )
+
+    def is_temporal(self, effect_hash: str) -> bool:
+        """Check if effect is temporal."""
+        return check_effect_temporal(self.cache, effect_hash)
+
+    def get_api_type(self, effect_hash: str) -> str:
+        """Get effect API type."""
+        return get_effect_api_type(self.cache, effect_hash)
--- a/core/artdag/effects/sandbox.py
+++ b/core/artdag/effects/sandbox.py
@@ -0,0 +1,431 @@
+"""
+Sandbox for effect execution.
+
+Uses bubblewrap (bwrap) for Linux namespace isolation.
+Provides controlled access to:
+  - Input files (read-only)
+  - Output file (write)
+  - stderr (logging)
+  - Seeded RNG
+"""
+
+import hashlib
+import json
+import logging
+import os
+import shutil
+import subprocess
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class SandboxConfig:
+    """
+    Sandbox configuration.
+
+    Attributes:
+        trust_level: "untrusted" (full isolation) or "trusted" (allows subprocess)
+        venv_path: Path to effect's virtual environment
+        wheel_cache: Shared wheel cache directory
+        timeout: Maximum execution time in seconds
+        memory_limit: Memory limit in bytes (0 = unlimited)
+        allow_network: Whether to allow network access
+    """
+
+    trust_level: str = "untrusted"
+    venv_path: Optional[Path] = None
+    wheel_cache: Path = field(default_factory=lambda: Path("/var/cache/artdag/wheels"))
+    timeout: int = 3600  # 1 hour default
+    memory_limit: int = 0
+    allow_network: bool = False
+
+
+def is_bwrap_available() -> bool:
+    """Check if bubblewrap is available."""
+    try:
+        result = subprocess.run(
+            ["bwrap", "--version"],
+            capture_output=True,
+            text=True,
+        )
+        return result.returncode == 0
+    except FileNotFoundError:
+        return False
+
+
+def get_venv_path(dependencies: List[str], cache_dir: Path = None) -> Path:
+    """
+    Get or create venv for given dependencies.
+
+    Uses hash of sorted dependencies for cache key.
+
+    Args:
+        dependencies: List of pip package specifiers
+        cache_dir: Base directory for venv cache
+
+    Returns:
+        Path to venv directory
+    """
+    cache_dir = cache_dir or Path("/var/cache/artdag/venvs")
+    cache_dir.mkdir(parents=True, exist_ok=True)
+
+    # Compute deps hash
+    sorted_deps = sorted(dep.lower().strip() for dep in dependencies)
+    deps_str = "\n".join(sorted_deps)
+    deps_hash = hashlib.sha3_256(deps_str.encode()).hexdigest()[:16]
+
+    venv_path = cache_dir / deps_hash
+
+    if venv_path.exists():
+        logger.debug(f"Reusing venv at {venv_path}")
+        return venv_path
+
+    # Create new venv
+    logger.info(f"Creating venv for {len(dependencies)} deps at {venv_path}")
+
+    subprocess.run(
+        ["python", "-m", "venv", str(venv_path)],
+        check=True,
+    )
+
+    # Install dependencies
+    pip_path = venv_path / "bin" / "pip"
+    wheel_cache = Path("/var/cache/artdag/wheels")
+
+    if dependencies:
+        cmd = [
+            str(pip_path),
+            "install",
+            "--cache-dir", str(wheel_cache),
+            *dependencies,
+        ]
+        subprocess.run(cmd, check=True)
+
+    return venv_path
+
+
+@dataclass
+class SandboxResult:
+    """Result of sandboxed execution."""
+
+    success: bool
+    output_path: Optional[Path] = None
+    stderr: str = ""
+    exit_code: int = 0
+    error: Optional[str] = None
+
+
+class Sandbox:
+    """
+    Sandboxed effect execution environment.
+
+    Uses bubblewrap for namespace isolation when available,
+    falls back to subprocess with restricted permissions.
+    """
+
+    def __init__(self, config: SandboxConfig = None):
+        self.config = config or SandboxConfig()
+        self._temp_dirs: List[Path] = []
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.cleanup()
+
+    def cleanup(self):
+        """Clean up temporary directories."""
+        for temp_dir in self._temp_dirs:
+            if temp_dir.exists():
+                shutil.rmtree(temp_dir, ignore_errors=True)
+        self._temp_dirs = []
+
+    def _create_temp_dir(self) -> Path:
+        """Create a temporary directory for sandbox use."""
+        temp_dir = Path(tempfile.mkdtemp(prefix="artdag_sandbox_"))
+        self._temp_dirs.append(temp_dir)
+        return temp_dir
+
+    def run_effect(
+        self,
+        effect_path: Path,
+        input_paths: List[Path],
+        output_path: Path,
+        params: Dict[str, Any],
+        bindings: Dict[str, List[float]] = None,
+        seed: int = 0,
+    ) -> SandboxResult:
+        """
+        Run an effect in the sandbox.
+
+        Args:
+            effect_path: Path to effect.py
+            input_paths: List of input file paths
+            output_path: Output file path
+            params: Effect parameters
+            bindings: Per-frame parameter bindings
+            seed: RNG seed for determinism
+
+        Returns:
+            SandboxResult with success status and output
+        """
+        bindings = bindings or {}
+
+        # Create work directory
+        work_dir = self._create_temp_dir()
+        config_path = work_dir / "config.json"
+        effect_copy = work_dir / "effect.py"
+
+        # Copy effect to work dir
+        shutil.copy(effect_path, effect_copy)
+
+        # Write config file
+        config_data = {
+            "input_paths": [str(p) for p in input_paths],
+            "output_path": str(output_path),
+            "params": params,
+            "bindings": bindings,
+            "seed": seed,
+        }
+        config_path.write_text(json.dumps(config_data))
+
+        if is_bwrap_available() and self.config.trust_level == "untrusted":
+            return self._run_with_bwrap(
+                effect_copy, config_path, input_paths, output_path, work_dir
+            )
+        else:
+            return self._run_subprocess(
+                effect_copy, config_path, input_paths, output_path, work_dir
+            )
+
+    def _run_with_bwrap(
+        self,
+        effect_path: Path,
+        config_path: Path,
+        input_paths: List[Path],
+        output_path: Path,
+        work_dir: Path,
+    ) -> SandboxResult:
+        """Run effect with bubblewrap isolation."""
+        logger.info("Running effect in bwrap sandbox")
+
+        # Build bwrap command
+        cmd = [
+            "bwrap",
+            # New PID namespace
+            "--unshare-pid",
+            # No network
+            "--unshare-net",
+            # Read-only root filesystem
+            "--ro-bind", "/", "/",
+            # Read-write work directory
+            "--bind", str(work_dir), str(work_dir),
+            # Read-only input files
+        ]
+
+        for input_path in input_paths:
+            cmd.extend(["--ro-bind", str(input_path), str(input_path)])
+
+        # Bind output directory as writable
+        output_dir = output_path.parent
+        output_dir.mkdir(parents=True, exist_ok=True)
+        cmd.extend(["--bind", str(output_dir), str(output_dir)])
+
+        # Bind venv if available
+        if self.config.venv_path and self.config.venv_path.exists():
+            cmd.extend(["--ro-bind", str(self.config.venv_path), str(self.config.venv_path)])
+            python_path = self.config.venv_path / "bin" / "python"
+        else:
+            python_path = Path("/usr/bin/python3")
+
+        # Add runner script
+        runner_script = self._get_runner_script()
+        runner_path = work_dir / "runner.py"
+        runner_path.write_text(runner_script)
+
+        # Run the effect
+        cmd.extend([
+            str(python_path),
+            str(runner_path),
+            str(effect_path),
+            str(config_path),
+        ])
+
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=self.config.timeout,
+            )
+
+            if result.returncode == 0 and output_path.exists():
+                return SandboxResult(
+                    success=True,
+                    output_path=output_path,
+                    stderr=result.stderr,
+                    exit_code=0,
+                )
+            else:
+                return SandboxResult(
+                    success=False,
+                    stderr=result.stderr,
+                    exit_code=result.returncode,
+                    error=result.stderr or "Effect execution failed",
+                )
+
+        except subprocess.TimeoutExpired:
+            return SandboxResult(
+                success=False,
+                error=f"Effect timed out after {self.config.timeout}s",
+                exit_code=-1,
+            )
+        except Exception as e:
+            return SandboxResult(
+                success=False,
+                error=str(e),
+                exit_code=-1,
+            )
+
+    def _run_subprocess(
+        self,
+        effect_path: Path,
+        config_path: Path,
+        input_paths: List[Path],
+        output_path: Path,
+        work_dir: Path,
+    ) -> SandboxResult:
+        """Run effect in subprocess (fallback without bwrap)."""
+        logger.warning("Running effect without sandbox isolation")
+
+        # Create runner script
+        runner_script = self._get_runner_script()
+        runner_path = work_dir / "runner.py"
+        runner_path.write_text(runner_script)
+
+        # Determine Python path
+        if self.config.venv_path and self.config.venv_path.exists():
+            python_path = self.config.venv_path / "bin" / "python"
+        else:
+            python_path = "python3"
+
+        cmd = [
+            str(python_path),
+            str(runner_path),
+            str(effect_path),
+            str(config_path),
+        ]
+
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=self.config.timeout,
+                cwd=str(work_dir),
+            )
+
+            if result.returncode == 0 and output_path.exists():
+                return SandboxResult(
+                    success=True,
+                    output_path=output_path,
+                    stderr=result.stderr,
+                    exit_code=0,
+                )
+            else:
+                return SandboxResult(
+                    success=False,
+                    stderr=result.stderr,
+                    exit_code=result.returncode,
+                    error=result.stderr or "Effect execution failed",
+                )
+
+        except subprocess.TimeoutExpired:
+            return SandboxResult(
+                success=False,
+                error=f"Effect timed out after {self.config.timeout}s",
+                exit_code=-1,
+            )
+        except Exception as e:
+            return SandboxResult(
+                success=False,
+                error=str(e),
+                exit_code=-1,
+            )
+
+    def _get_runner_script(self) -> str:
+        """Get the runner script that executes effects."""
+        return '''#!/usr/bin/env python3
+"""Effect runner script - executed in sandbox."""
+
+import importlib.util
+import json
+import sys
+from pathlib import Path
+
+def load_effect(effect_path):
+    """Load effect module from path."""
+    spec = importlib.util.spec_from_file_location("effect", effect_path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+def main():
+    if len(sys.argv) < 3:
+        print("Usage: runner.py <effect_path> <config_path>", file=sys.stderr)
+        sys.exit(1)
+
+    effect_path = Path(sys.argv[1])
+    config_path = Path(sys.argv[2])
+
+    # Load config
+    config = json.loads(config_path.read_text())
+
+    input_paths = [Path(p) for p in config["input_paths"]]
+    output_path = Path(config["output_path"])
+    params = config["params"]
+    bindings = config.get("bindings", {})
+    seed = config.get("seed", 0)
+
+    # Load effect
+    effect = load_effect(effect_path)
+
+    # Check API type
+    if hasattr(effect, "process"):
+        # Whole-video API
+        from artdag.effects.meta import ExecutionContext
+        ctx = ExecutionContext(
+            input_paths=[str(p) for p in input_paths],
+            output_path=str(output_path),
+            params=params,
+            seed=seed,
+            bindings=bindings,
+        )
+        effect.process(input_paths, output_path, params, ctx)
+
+    elif hasattr(effect, "process_frame"):
+        # Frame-by-frame API
+        from artdag.effects.frame_processor import process_video
+
+        result_path, _ = process_video(
+            input_path=input_paths[0],
+            output_path=output_path,
+            process_frame=effect.process_frame,
+            params=params,
+            bindings=bindings,
+        )
+
+    else:
+        print("Effect must have process() or process_frame()", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"Success: {output_path}", file=sys.stderr)
+
+if __name__ == "__main__":
+    main()
+'''
--- a/core/artdag/engine.py
+++ b/core/artdag/engine.py
@@ -0,0 +1,246 @@
+# primitive/engine.py
+"""
+DAG execution engine.
+
+Executes DAGs by:
+1. Resolving nodes in topological order
+2. Checking cache for each node
+3. Running executors for cache misses
+4. Storing results in cache
+"""
+
+import logging
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional
+
+from .dag import DAG, Node, NodeType
+from .cache import Cache
+from .executor import Executor, get_executor
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ExecutionResult:
+    """Result of executing a DAG."""
+    success: bool
+    output_path: Optional[Path] = None
+    error: Optional[str] = None
+    execution_time: float = 0.0
+    nodes_executed: int = 0
+    nodes_cached: int = 0
+    node_results: Dict[str, Path] = field(default_factory=dict)
+
+
+@dataclass
+class NodeProgress:
+    """Progress update for a node."""
+    node_id: str
+    node_type: str
+    status: str  # "pending", "running", "cached", "completed", "failed"
+    progress: float = 0.0  # 0.0 to 1.0
+    message: str = ""
+
+
+# Progress callback type
+ProgressCallback = Callable[[NodeProgress], None]
+
+
+class Engine:
+    """
+    DAG execution engine.
+
+    Manages cache, resolves dependencies, and runs executors.
+    """
+
+    def __init__(self, cache_dir: Path | str):
+        self.cache = Cache(cache_dir)
+        self._progress_callback: Optional[ProgressCallback] = None
+
+    def set_progress_callback(self, callback: ProgressCallback):
+        """Set callback for progress updates."""
+        self._progress_callback = callback
+
+    def _report_progress(self, progress: NodeProgress):
+        """Report progress to callback if set."""
+        if self._progress_callback:
+            try:
+                self._progress_callback(progress)
+            except Exception as e:
+                logger.warning(f"Progress callback error: {e}")
+
+    def execute(self, dag: DAG) -> ExecutionResult:
+        """
+        Execute a DAG and return the result.
+
+        Args:
+            dag: The DAG to execute
+
+        Returns:
+            ExecutionResult with output path or error
+        """
+        start_time = time.time()
+        node_results: Dict[str, Path] = {}
+        nodes_executed = 0
+        nodes_cached = 0
+
+        # Validate DAG
+        errors = dag.validate()
+        if errors:
+            return ExecutionResult(
+                success=False,
+                error=f"Invalid DAG: {errors}",
+                execution_time=time.time() - start_time,
+            )
+
+        # Get topological order
+        try:
+            order = dag.topological_order()
+        except Exception as e:
+            return ExecutionResult(
+                success=False,
+                error=f"Failed to order DAG: {e}",
+                execution_time=time.time() - start_time,
+            )
+
+        # Execute each node
+        for node_id in order:
+            node = dag.get_node(node_id)
+            type_str = node.node_type.name if isinstance(node.node_type, NodeType) else str(node.node_type)
+
+            # Report starting
+            self._report_progress(NodeProgress(
+                node_id=node_id,
+                node_type=type_str,
+                status="pending",
+                message=f"Processing {type_str}",
+            ))
+
+            # Check cache first
+            cached_path = self.cache.get(node_id)
+            if cached_path is not None:
+                node_results[node_id] = cached_path
+                nodes_cached += 1
+                self._report_progress(NodeProgress(
+                    node_id=node_id,
+                    node_type=type_str,
+                    status="cached",
+                    progress=1.0,
+                    message="Using cached result",
+                ))
+                continue
+
+            # Get executor
+            executor = get_executor(node.node_type)
+            if executor is None:
+                return ExecutionResult(
+                    success=False,
+                    error=f"No executor for node type: {node.node_type}",
+                    execution_time=time.time() - start_time,
+                    node_results=node_results,
+                )
+
+            # Resolve input paths
+            input_paths = []
+            for input_id in node.inputs:
+                if input_id not in node_results:
+                    return ExecutionResult(
+                        success=False,
+                        error=f"Missing input {input_id} for node {node_id}",
+                        execution_time=time.time() - start_time,
+                        node_results=node_results,
+                    )
+                input_paths.append(node_results[input_id])
+
+            # Determine output path
+            output_path = self.cache.get_output_path(node_id, ".mkv")
+
+            # Execute
+            self._report_progress(NodeProgress(
+                node_id=node_id,
+                node_type=type_str,
+                status="running",
+                progress=0.5,
+                message=f"Executing {type_str}",
+            ))
+
+            node_start = time.time()
+            try:
+                result_path = executor.execute(
+                    config=node.config,
+                    inputs=input_paths,
+                    output_path=output_path,
+                )
+                node_time = time.time() - node_start
+
+                # Store in cache (file is already at output_path)
+                self.cache.put(
+                    node_id=node_id,
+                    source_path=result_path,
+                    node_type=type_str,
+                    execution_time=node_time,
+                    move=False,  # Already in place
+                )
+
+                node_results[node_id] = result_path
+                nodes_executed += 1
+
+                self._report_progress(NodeProgress(
+                    node_id=node_id,
+                    node_type=type_str,
+                    status="completed",
+                    progress=1.0,
+                    message=f"Completed in {node_time:.2f}s",
+                ))
+
+            except Exception as e:
+                logger.error(f"Node {node_id} failed: {e}")
+                self._report_progress(NodeProgress(
+                    node_id=node_id,
+                    node_type=type_str,
+                    status="failed",
+                    message=str(e),
+                ))
+                return ExecutionResult(
+                    success=False,
+                    error=f"Node {node_id} ({type_str}) failed: {e}",
+                    execution_time=time.time() - start_time,
+                    node_results=node_results,
+                    nodes_executed=nodes_executed,
+                    nodes_cached=nodes_cached,
+                )
+
+        # Get final output
+        output_path = node_results.get(dag.output_id)
+
+        return ExecutionResult(
+            success=True,
+            output_path=output_path,
+            execution_time=time.time() - start_time,
+            nodes_executed=nodes_executed,
+            nodes_cached=nodes_cached,
+            node_results=node_results,
+        )
+
+    def execute_node(self, node: Node, inputs: List[Path]) -> Path:
+        """
+        Execute a single node (bypassing DAG structure).
+
+        Useful for testing individual executors.
+        """
+        executor = get_executor(node.node_type)
+        if executor is None:
+            raise ValueError(f"No executor for node type: {node.node_type}")
+
+        output_path = self.cache.get_output_path(node.node_id, ".mkv")
+        return executor.execute(node.config, inputs, output_path)
+
+    def get_cache_stats(self):
+        """Get cache statistics."""
+        return self.cache.get_stats()
+
+    def clear_cache(self):
+        """Clear the cache."""
+        self.cache.clear()
--- a/core/artdag/executor.py
+++ b/core/artdag/executor.py
@@ -0,0 +1,106 @@
+# primitive/executor.py
+"""
+Executor base class and registry.
+
+Executors implement the actual operations for each node type.
+They are registered by node type and looked up during execution.
+"""
+
+import logging
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Type
+
+from .dag import NodeType
+
+logger = logging.getLogger(__name__)
+
+# Global executor registry
+_EXECUTORS: Dict[NodeType | str, Type["Executor"]] = {}
+
+
+class Executor(ABC):
+    """
+    Base class for node executors.
+
+    Subclasses implement execute() to perform the actual operation.
+    """
+
+    @abstractmethod
+    def execute(
+        self,
+        config: Dict[str, Any],
+        inputs: List[Path],
+        output_path: Path,
+    ) -> Path:
+        """
+        Execute the node operation.
+
+        Args:
+            config: Node configuration
+            inputs: Paths to input files (from resolved input nodes)
+            output_path: Where to write the output
+
+        Returns:
+            Path to the output file
+        """
+        pass
+
+    def validate_config(self, config: Dict[str, Any]) -> List[str]:
+        """
+        Validate node configuration.
+
+        Returns list of error messages (empty if valid).
+        Override in subclasses for specific validation.
+        """
+        return []
+
+    def estimate_output_size(self, config: Dict[str, Any], input_sizes: List[int]) -> int:
+        """
+        Estimate output size in bytes.
+
+        Override for better estimates. Default returns sum of inputs.
+        """
+        return sum(input_sizes) if input_sizes else 0
+
+
+def register_executor(node_type: NodeType | str) -> Callable:
+    """
+    Decorator to register an executor for a node type.
+
+    Usage:
+        @register_executor(NodeType.SOURCE)
+        class SourceExecutor(Executor):
+            ...
+    """
+    def decorator(cls: Type[Executor]) -> Type[Executor]:
+        if node_type in _EXECUTORS:
+            logger.warning(f"Overwriting executor for {node_type}")
+        _EXECUTORS[node_type] = cls
+        return cls
+    return decorator
+
+
+def get_executor(node_type: NodeType | str) -> Optional[Executor]:
+    """
+    Get an executor instance for a node type.
+
+    Returns None if no executor is registered.
+    """
+    executor_cls = _EXECUTORS.get(node_type)
+    if executor_cls is None:
+        return None
+    return executor_cls()
+
+
+def list_executors() -> Dict[str, Type[Executor]]:
+    """List all registered executors."""
+    return {
+        (k.name if isinstance(k, NodeType) else k): v
+        for k, v in _EXECUTORS.items()
+    }
+
+
+def clear_executors():
+    """Clear all registered executors (for testing)."""
+    _EXECUTORS.clear()
--- a/core/artdag/nodes/init.py
+++ b/core/artdag/nodes/init.py
@@ -0,0 +1,11 @@
+# primitive/nodes/__init__.py
+"""
+Built-in node executors.
+
+Import this module to register all built-in executors.
+"""
+
+from . import source
+from . import transform
+from . import compose
+from . import effect
--- a/core/artdag/nodes/compose.py
+++ b/core/artdag/nodes/compose.py
@@ -0,0 +1,548 @@
+# primitive/nodes/compose.py
+"""
+Compose executors: Combine multiple media inputs.
+
+Primitives: SEQUENCE, LAYER, MUX, BLEND
+"""
+
+import logging
+import shutil
+import subprocess
+from pathlib import Path
+from typing import Any, Dict, List
+
+from ..dag import NodeType
+from ..executor import Executor, register_executor
+from .encoding import WEB_ENCODING_ARGS_STR, get_web_encoding_args
+
+logger = logging.getLogger(__name__)
+
+
+def _get_duration(path: Path) -> float:
+    """Get media duration in seconds."""
+    cmd = [
+        "ffprobe", "-v", "error",
+        "-show_entries", "format=duration",
+        "-of", "csv=p=0",
+        str(path)
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    return float(result.stdout.strip())
+
+
+def _get_video_info(path: Path) -> dict:
+    """Get video width, height, frame rate, and sample rate."""
+    cmd = [
+        "ffprobe", "-v", "error",
+        "-select_streams", "v:0",
+        "-show_entries", "stream=width,height,r_frame_rate",
+        "-of", "csv=p=0",
+        str(path)
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    parts = result.stdout.strip().split(",")
+    width = int(parts[0]) if len(parts) > 0 and parts[0] else 1920
+    height = int(parts[1]) if len(parts) > 1 and parts[1] else 1080
+    fps_str = parts[2] if len(parts) > 2 else "30/1"
+    # Parse frame rate (e.g., "30/1" or "30000/1001")
+    if "/" in fps_str:
+        num, den = fps_str.split("/")
+        fps = float(num) / float(den) if float(den) != 0 else 30
+    else:
+        fps = float(fps_str) if fps_str else 30
+
+    # Get audio sample rate
+    cmd_audio = [
+        "ffprobe", "-v", "error",
+        "-select_streams", "a:0",
+        "-show_entries", "stream=sample_rate",
+        "-of", "csv=p=0",
+        str(path)
+    ]
+    result_audio = subprocess.run(cmd_audio, capture_output=True, text=True)
+    sample_rate = int(result_audio.stdout.strip()) if result_audio.stdout.strip() else 44100
+
+    return {"width": width, "height": height, "fps": fps, "sample_rate": sample_rate}
+
+
+@register_executor(NodeType.SEQUENCE)
+class SequenceExecutor(Executor):
+    """
+    Concatenate inputs in time order.
+
+    Config:
+        transition: Transition config
+            type: "cut" | "crossfade" | "fade"
+            duration: Transition duration in seconds
+        target_size: How to determine output dimensions when inputs differ
+            "first": Use first input's dimensions (default)
+            "last": Use last input's dimensions
+            "largest": Use largest width and height from all inputs
+            "explicit": Use width/height config values
+        width: Target width (when target_size="explicit")
+        height: Target height (when target_size="explicit")
+        background: Padding color for letterbox/pillarbox (default: "black")
+    """
+
+    def execute(
+        self,
+        config: Dict[str, Any],
+        inputs: List[Path],
+        output_path: Path,
+    ) -> Path:
+        if len(inputs) < 1:
+            raise ValueError("SEQUENCE requires at least one input")
+
+        if len(inputs) == 1:
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            shutil.copy2(inputs[0], output_path)
+            return output_path
+
+        transition = config.get("transition", {"type": "cut"})
+        transition_type = transition.get("type", "cut")
+        transition_duration = transition.get("duration", 0.5)
+
+        # Size handling config
+        target_size = config.get("target_size", "first")
+        width = config.get("width")
+        height = config.get("height")
+        background = config.get("background", "black")
+
+        if transition_type == "cut":
+            return self._concat_cut(inputs, output_path, target_size, width, height, background)
+        elif transition_type == "crossfade":
+            return self._concat_crossfade(inputs, output_path, transition_duration)
+        elif transition_type == "fade":
+            return self._concat_fade(inputs, output_path, transition_duration)
+        else:
+            raise ValueError(f"Unknown transition type: {transition_type}")
+
+    def _concat_cut(
+        self,
+        inputs: List[Path],
+        output_path: Path,
+        target_size: str = "first",
+        width: int = None,
+        height: int = None,
+        background: str = "black",
+    ) -> Path:
+        """
+        Concatenate with scaling/padding to handle different resolutions.
+
+        Args:
+            inputs: Input video paths
+            output_path: Output path
+            target_size: How to determine output size:
+                - "first": Use first input's dimensions (default)
+                - "last": Use last input's dimensions
+                - "largest": Use largest dimensions from all inputs
+                - "explicit": Use width/height params
+            width: Explicit width (when target_size="explicit")
+            height: Explicit height (when target_size="explicit")
+            background: Padding color (default: black)
+        """
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        n = len(inputs)
+        input_args = []
+        for p in inputs:
+            input_args.extend(["-i", str(p)])
+
+        # Get video info for all inputs
+        infos = [_get_video_info(p) for p in inputs]
+
+        # Determine target dimensions
+        if target_size == "explicit" and width and height:
+            target_w, target_h = width, height
+        elif target_size == "last":
+            target_w, target_h = infos[-1]["width"], infos[-1]["height"]
+        elif target_size == "largest":
+            target_w = max(i["width"] for i in infos)
+            target_h = max(i["height"] for i in infos)
+        else:  # "first" or default
+            target_w, target_h = infos[0]["width"], infos[0]["height"]
+
+        # Use common frame rate (from first input) and sample rate
+        target_fps = infos[0]["fps"]
+        target_sr = max(i["sample_rate"] for i in infos)
+
+        # Build filter for each input: scale to fit + pad to target size
+        filter_parts = []
+        for i in range(n):
+            # Scale to fit within target, maintaining aspect ratio, then pad
+            vf = (
+                f"[{i}:v]scale={target_w}:{target_h}:force_original_aspect_ratio=decrease,"
+                f"pad={target_w}:{target_h}:(ow-iw)/2:(oh-ih)/2:color={background},"
+                f"setsar=1,fps={target_fps:.6f}[v{i}]"
+            )
+            # Resample audio to common rate
+            af = f"[{i}:a]aresample={target_sr}[a{i}]"
+            filter_parts.append(vf)
+            filter_parts.append(af)
+
+        # Build concat filter
+        stream_labels = "".join(f"[v{i}][a{i}]" for i in range(n))
+        filter_parts.append(f"{stream_labels}concat=n={n}:v=1:a=1[outv][outa]")
+
+        filter_complex = ";".join(filter_parts)
+
+        cmd = [
+            "ffmpeg", "-y",
+            *input_args,
+            "-filter_complex", filter_complex,
+            "-map", "[outv]",
+            "-map", "[outa]",
+            *get_web_encoding_args(),
+            str(output_path)
+        ]
+
+        logger.debug(f"SEQUENCE cut: {n} clips -> {target_w}x{target_h} (web-optimized)")
+        result = subprocess.run(cmd, capture_output=True, text=True)
+
+        if result.returncode != 0:
+            raise RuntimeError(f"Concat failed: {result.stderr}")
+
+        return output_path
+
+    def _concat_crossfade(
+        self,
+        inputs: List[Path],
+        output_path: Path,
+        duration: float,
+    ) -> Path:
+        """Concatenate with crossfade transitions."""
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        durations = [_get_duration(p) for p in inputs]
+        n = len(inputs)
+        input_args = " ".join(f"-i {p}" for p in inputs)
+
+        # Build xfade filter chain
+        filter_parts = []
+        current = "[0:v]"
+
+        for i in range(1, n):
+            offset = sum(durations[:i]) - duration * i
+            next_input = f"[{i}:v]"
+            output_label = f"[v{i}]" if i < n - 1 else "[outv]"
+            filter_parts.append(
+                f"{current}{next_input}xfade=transition=fade:duration={duration}:offset={offset}{output_label}"
+            )
+            current = output_label
+
+        # Audio crossfade chain
+        audio_current = "[0:a]"
+        for i in range(1, n):
+            next_input = f"[{i}:a]"
+            output_label = f"[a{i}]" if i < n - 1 else "[outa]"
+            filter_parts.append(
+                f"{audio_current}{next_input}acrossfade=d={duration}{output_label}"
+            )
+            audio_current = output_label
+
+        filter_complex = ";".join(filter_parts)
+
+        cmd = f'ffmpeg -y {input_args} -filter_complex "{filter_complex}" -map [outv] -map [outa] {WEB_ENCODING_ARGS_STR} {output_path}'
+
+        logger.debug(f"SEQUENCE crossfade: {n} clips (web-optimized)")
+        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+
+        if result.returncode != 0:
+            logger.warning(f"Crossfade failed, falling back to cut: {result.stderr[:200]}")
+            return self._concat_cut(inputs, output_path)
+
+        return output_path
+
+    def _concat_fade(
+        self,
+        inputs: List[Path],
+        output_path: Path,
+        duration: float,
+    ) -> Path:
+        """Concatenate with fade out/in transitions."""
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        faded_paths = []
+        for i, path in enumerate(inputs):
+            clip_dur = _get_duration(path)
+            faded_path = output_path.parent / f"_faded_{i}.mkv"
+
+            cmd = [
+                "ffmpeg", "-y",
+                "-i", str(path),
+                "-vf", f"fade=in:st=0:d={duration},fade=out:st={clip_dur - duration}:d={duration}",
+                "-af", f"afade=in:st=0:d={duration},afade=out:st={clip_dur - duration}:d={duration}",
+                "-c:v", "libx264", "-preset", "ultrafast", "-crf", "18",
+                "-c:a", "aac",
+                str(faded_path)
+            ]
+            subprocess.run(cmd, capture_output=True, check=True)
+            faded_paths.append(faded_path)
+
+        result = self._concat_cut(faded_paths, output_path)
+
+        for p in faded_paths:
+            p.unlink()
+
+        return result
+
+
+@register_executor(NodeType.LAYER)
+class LayerExecutor(Executor):
+    """
+    Layer inputs spatially (overlay/composite).
+
+    Config:
+        inputs: List of per-input configs
+            position: [x, y] offset
+            opacity: 0.0-1.0
+            scale: Scale factor
+    """
+
+    def execute(
+        self,
+        config: Dict[str, Any],
+        inputs: List[Path],
+        output_path: Path,
+    ) -> Path:
+        if len(inputs) < 1:
+            raise ValueError("LAYER requires at least one input")
+
+        if len(inputs) == 1:
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            shutil.copy2(inputs[0], output_path)
+            return output_path
+
+        input_configs = config.get("inputs", [{}] * len(inputs))
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        input_args = " ".join(f"-i {p}" for p in inputs)
+        n = len(inputs)
+        filter_parts = []
+        current = "[0:v]"
+
+        for i in range(1, n):
+            cfg = input_configs[i] if i < len(input_configs) else {}
+            x, y = cfg.get("position", [0, 0])
+            opacity = cfg.get("opacity", 1.0)
+            scale = cfg.get("scale", 1.0)
+
+            scale_label = f"[s{i}]"
+            if scale != 1.0:
+                filter_parts.append(f"[{i}:v]scale=iw*{scale}:ih*{scale}{scale_label}")
+                overlay_input = scale_label
+            else:
+                overlay_input = f"[{i}:v]"
+
+            output_label = f"[v{i}]" if i < n - 1 else "[outv]"
+
+            if opacity < 1.0:
+                filter_parts.append(
+                    f"{overlay_input}format=rgba,colorchannelmixer=aa={opacity}[a{i}]"
+                )
+                overlay_input = f"[a{i}]"
+
+            filter_parts.append(
+                f"{current}{overlay_input}overlay=x={x}:y={y}:format=auto{output_label}"
+            )
+            current = output_label
+
+        filter_complex = ";".join(filter_parts)
+
+        cmd = f'ffmpeg -y {input_args} -filter_complex "{filter_complex}" -map [outv] -map 0:a? {WEB_ENCODING_ARGS_STR} {output_path}'
+
+        logger.debug(f"LAYER: {n} inputs (web-optimized)")
+        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+
+        if result.returncode != 0:
+            raise RuntimeError(f"Layer failed: {result.stderr}")
+
+        return output_path
+
+
+@register_executor(NodeType.MUX)
+class MuxExecutor(Executor):
+    """
+    Combine video and audio streams.
+
+    Config:
+        video_stream: Index of video input (default: 0)
+        audio_stream: Index of audio input (default: 1)
+        shortest: End when shortest stream ends (default: True)
+    """
+
+    def execute(
+        self,
+        config: Dict[str, Any],
+        inputs: List[Path],
+        output_path: Path,
+    ) -> Path:
+        if len(inputs) < 2:
+            raise ValueError("MUX requires at least 2 inputs (video + audio)")
+
+        video_idx = config.get("video_stream", 0)
+        audio_idx = config.get("audio_stream", 1)
+        shortest = config.get("shortest", True)
+
+        video_path = inputs[video_idx]
+        audio_path = inputs[audio_idx]
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        cmd = [
+            "ffmpeg", "-y",
+            "-i", str(video_path),
+            "-i", str(audio_path),
+            "-c:v", "copy",
+            "-c:a", "aac",
+            "-map", "0:v:0",
+            "-map", "1:a:0",
+        ]
+
+        if shortest:
+            cmd.append("-shortest")
+
+        cmd.append(str(output_path))
+
+        logger.debug(f"MUX: video={video_path.name} + audio={audio_path.name}")
+        result = subprocess.run(cmd, capture_output=True, text=True)
+
+        if result.returncode != 0:
+            raise RuntimeError(f"Mux failed: {result.stderr}")
+
+        return output_path
+
+
+@register_executor(NodeType.BLEND)
+class BlendExecutor(Executor):
+    """
+    Blend two inputs using a blend mode.
+
+    Config:
+        mode: Blend mode (multiply, screen, overlay, add, etc.)
+        opacity: 0.0-1.0 for second input
+    """
+
+    BLEND_MODES = {
+        "multiply": "multiply",
+        "screen": "screen",
+        "overlay": "overlay",
+        "add": "addition",
+        "subtract": "subtract",
+        "average": "average",
+        "difference": "difference",
+        "lighten": "lighten",
+        "darken": "darken",
+    }
+
+    def execute(
+        self,
+        config: Dict[str, Any],
+        inputs: List[Path],
+        output_path: Path,
+    ) -> Path:
+        if len(inputs) != 2:
+            raise ValueError("BLEND requires exactly 2 inputs")
+
+        mode = config.get("mode", "overlay")
+        opacity = config.get("opacity", 0.5)
+
+        if mode not in self.BLEND_MODES:
+            raise ValueError(f"Unknown blend mode: {mode}")
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        blend_mode = self.BLEND_MODES[mode]
+
+        if opacity < 1.0:
+            filter_complex = (
+                f"[1:v]format=rgba,colorchannelmixer=aa={opacity}[b];"
+                f"[0:v][b]blend=all_mode={blend_mode}"
+            )
+        else:
+            filter_complex = f"[0:v][1:v]blend=all_mode={blend_mode}"
+
+        cmd = [
+            "ffmpeg", "-y",
+            "-i", str(inputs[0]),
+            "-i", str(inputs[1]),
+            "-filter_complex", filter_complex,
+            "-map", "0:a?",
+            *get_web_encoding_args(),
+            str(output_path)
+        ]
+
+        logger.debug(f"BLEND: {mode} (opacity={opacity}) (web-optimized)")
+        result = subprocess.run(cmd, capture_output=True, text=True)
+
+        if result.returncode != 0:
+            raise RuntimeError(f"Blend failed: {result.stderr}")
+
+        return output_path
+
+
+@register_executor(NodeType.AUDIO_MIX)
+class AudioMixExecutor(Executor):
+    """
+    Mix multiple audio streams.
+
+    Config:
+        gains: List of gain values per input (0.0-2.0, default 1.0)
+        normalize: Normalize output to prevent clipping (default True)
+    """
+
+    def execute(
+        self,
+        config: Dict[str, Any],
+        inputs: List[Path],
+        output_path: Path,
+    ) -> Path:
+        if len(inputs) < 2:
+            raise ValueError("AUDIO_MIX requires at least 2 inputs")
+
+        gains = config.get("gains", [1.0] * len(inputs))
+        normalize = config.get("normalize", True)
+
+        # Pad gains list if too short
+        while len(gains) < len(inputs):
+            gains.append(1.0)
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Build filter: apply volume to each input, then mix
+        filter_parts = []
+        mix_inputs = []
+
+        for i, gain in enumerate(gains[:len(inputs)]):
+            if gain != 1.0:
+                filter_parts.append(f"[{i}:a]volume={gain}[a{i}]")
+                mix_inputs.append(f"[a{i}]")
+            else:
+                mix_inputs.append(f"[{i}:a]")
+
+        # amix filter
+        normalize_flag = 1 if normalize else 0
+        mix_filter = f"{''.join(mix_inputs)}amix=inputs={len(inputs)}:normalize={normalize_flag}[aout]"
+        filter_parts.append(mix_filter)
+
+        filter_complex = ";".join(filter_parts)
+
+        cmd = [
+            "ffmpeg", "-y",
+        ]
+        for p in inputs:
+            cmd.extend(["-i", str(p)])
+
+        cmd.extend([
+            "-filter_complex", filter_complex,
+            "-map", "[aout]",
+            "-c:a", "aac",
+            str(output_path)
+        ])
+
+        logger.debug(f"AUDIO_MIX: {len(inputs)} inputs, gains={gains[:len(inputs)]}")
+        result = subprocess.run(cmd, capture_output=True, text=True)
+
+        if result.returncode != 0:
+            raise RuntimeError(f"Audio mix failed: {result.stderr}")
+
+        return output_path
--- a/core/artdag/nodes/effect.py
+++ b/core/artdag/nodes/effect.py
@@ -0,0 +1,520 @@
+# artdag/nodes/effect.py
+"""
+Effect executor: Apply effects from the registry or IPFS.
+
+Primitives: EFFECT
+
+Effects can be:
+1. Built-in (registered with @register_effect)
+2. Stored in IPFS (referenced by CID)
+"""
+
+import importlib.util
+import logging
+import os
+import re
+import shutil
+import tempfile
+from pathlib import Path
+from types import ModuleType
+from typing import Any, Callable, Dict, List, Optional, TypeVar
+
+import requests
+
+from ..executor import Executor, register_executor
+
+logger = logging.getLogger(__name__)
+
+# Type alias for effect functions: (input_path, output_path, config) -> output_path
+EffectFn = Callable[[Path, Path, Dict[str, Any]], Path]
+
+# Type variable for decorator
+F = TypeVar("F", bound=Callable[..., Any])
+
+# IPFS API multiaddr - same as ipfs_client.py for consistency
+# Docker uses /dns/ipfs/tcp/5001, local dev uses /ip4/127.0.0.1/tcp/5001
+IPFS_API = os.environ.get("IPFS_API", "/ip4/127.0.0.1/tcp/5001")
+
+# Connection timeout in seconds
+IPFS_TIMEOUT = int(os.environ.get("IPFS_TIMEOUT", "30"))
+
+
+def _get_ipfs_base_url() -> str:
+    """
+    Convert IPFS multiaddr to HTTP URL.
+
+    Matches the conversion logic in ipfs_client.py for consistency.
+    """
+    multiaddr = IPFS_API
+
+    # Handle /dns/hostname/tcp/port format (Docker)
+    dns_match = re.match(r"/dns[46]?/([^/]+)/tcp/(\d+)", multiaddr)
+    if dns_match:
+        return f"http://{dns_match.group(1)}:{dns_match.group(2)}"
+
+    # Handle /ip4/address/tcp/port format (local)
+    ip4_match = re.match(r"/ip4/([^/]+)/tcp/(\d+)", multiaddr)
+    if ip4_match:
+        return f"http://{ip4_match.group(1)}:{ip4_match.group(2)}"
+
+    # Fallback: assume it's already a URL or use default
+    if multiaddr.startswith("http"):
+        return multiaddr
+    return "http://127.0.0.1:5001"
+
+
+def _get_effects_cache_dir() -> Optional[Path]:
+    """Get the effects cache directory from environment or default."""
+    # Check both env var names (CACHE_DIR used by art-celery, ARTDAG_CACHE_DIR for standalone)
+    for env_var in ["CACHE_DIR", "ARTDAG_CACHE_DIR"]:
+        cache_dir = os.environ.get(env_var)
+        if cache_dir:
+            effects_dir = Path(cache_dir) / "_effects"
+            if effects_dir.exists():
+                return effects_dir
+
+    # Try default locations
+    for base in [Path.home() / ".artdag" / "cache", Path("/var/cache/artdag")]:
+        effects_dir = base / "_effects"
+        if effects_dir.exists():
+            return effects_dir
+
+    return None
+
+
+def _fetch_effect_from_ipfs(cid: str, effect_path: Path) -> bool:
+    """
+    Fetch an effect from IPFS and cache locally.
+
+    Uses the IPFS API endpoint (/api/v0/cat) for consistency with ipfs_client.py.
+    This works reliably in Docker where IPFS_API=/dns/ipfs/tcp/5001.
+
+    Returns True on success, False on failure.
+    """
+    try:
+        # Use IPFS API (same as ipfs_client.py)
+        base_url = _get_ipfs_base_url()
+        url = f"{base_url}/api/v0/cat"
+        params = {"arg": cid}
+
+        response = requests.post(url, params=params, timeout=IPFS_TIMEOUT)
+        response.raise_for_status()
+
+        # Cache locally
+        effect_path.parent.mkdir(parents=True, exist_ok=True)
+        effect_path.write_bytes(response.content)
+        logger.info(f"Fetched effect from IPFS: {cid[:16]}...")
+        return True
+
+    except Exception as e:
+        logger.error(f"Failed to fetch effect from IPFS {cid[:16]}...: {e}")
+        return False
+
+
+def _parse_pep723_dependencies(source: str) -> List[str]:
+    """
+    Parse PEP 723 dependencies from effect source code.
+
+    Returns list of package names (e.g., ["numpy", "opencv-python"]).
+    """
+    match = re.search(r"# /// script\n(.*?)# ///", source, re.DOTALL)
+    if not match:
+        return []
+
+    block = match.group(1)
+    deps_match = re.search(r'# dependencies = \[(.*?)\]', block, re.DOTALL)
+    if not deps_match:
+        return []
+
+    return re.findall(r'"([^"]+)"', deps_match.group(1))
+
+
+def _ensure_dependencies(dependencies: List[str], effect_cid: str) -> bool:
+    """
+    Ensure effect dependencies are installed.
+
+    Installs missing packages using pip. Returns True on success.
+    """
+    if not dependencies:
+        return True
+
+    missing = []
+    for dep in dependencies:
+        # Extract package name (strip version specifiers)
+        pkg_name = re.split(r'[<>=!]', dep)[0].strip()
+        # Normalize name (pip uses underscores, imports use underscores or hyphens)
+        pkg_name_normalized = pkg_name.replace('-', '_').lower()
+
+        try:
+            __import__(pkg_name_normalized)
+        except ImportError:
+            # Some packages have different import names
+            try:
+                # Try original name with hyphens replaced
+                __import__(pkg_name.replace('-', '_'))
+            except ImportError:
+                missing.append(dep)
+
+    if not missing:
+        return True
+
+    logger.info(f"Installing effect dependencies for {effect_cid[:16]}...: {missing}")
+
+    try:
+        import subprocess
+        import sys
+
+        result = subprocess.run(
+            [sys.executable, "-m", "pip", "install", "--quiet"] + missing,
+            capture_output=True,
+            text=True,
+            timeout=120,
+        )
+
+        if result.returncode != 0:
+            logger.error(f"Failed to install dependencies: {result.stderr}")
+            return False
+
+        logger.info(f"Installed dependencies: {missing}")
+        return True
+
+    except Exception as e:
+        logger.error(f"Error installing dependencies: {e}")
+        return False
+
+
+def _load_cached_effect(effect_cid: str) -> Optional[EffectFn]:
+    """
+    Load an effect by CID, fetching from IPFS if not cached locally.
+
+    Returns the effect function or None if not found.
+    """
+    effects_dir = _get_effects_cache_dir()
+
+    # Create cache dir if needed
+    if not effects_dir:
+        # Try to create default cache dir
+        for env_var in ["CACHE_DIR", "ARTDAG_CACHE_DIR"]:
+            cache_dir = os.environ.get(env_var)
+            if cache_dir:
+                effects_dir = Path(cache_dir) / "_effects"
+                effects_dir.mkdir(parents=True, exist_ok=True)
+                break
+
+        if not effects_dir:
+            effects_dir = Path.home() / ".artdag" / "cache" / "_effects"
+            effects_dir.mkdir(parents=True, exist_ok=True)
+
+    effect_path = effects_dir / effect_cid / "effect.py"
+
+    # If not cached locally, fetch from IPFS
+    if not effect_path.exists():
+        if not _fetch_effect_from_ipfs(effect_cid, effect_path):
+            logger.warning(f"Effect not found: {effect_cid[:16]}...")
+            return None
+
+    # Parse and install dependencies before loading
+    try:
+        source = effect_path.read_text()
+        dependencies = _parse_pep723_dependencies(source)
+        if dependencies:
+            logger.info(f"Effect {effect_cid[:16]}... requires: {dependencies}")
+            if not _ensure_dependencies(dependencies, effect_cid):
+                logger.error(f"Failed to install dependencies for effect {effect_cid[:16]}...")
+                return None
+    except Exception as e:
+        logger.error(f"Error parsing effect dependencies: {e}")
+        # Continue anyway - the effect might work without the deps check
+
+    # Load the effect module
+    try:
+        spec = importlib.util.spec_from_file_location("cached_effect", effect_path)
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
+
+        # Check for frame-by-frame API
+        if hasattr(module, "process_frame"):
+            return _wrap_frame_effect(module, effect_path)
+
+        # Check for whole-video API
+        if hasattr(module, "process"):
+            return _wrap_video_effect(module)
+
+        # Check for old-style effect function
+        if hasattr(module, "effect"):
+            return module.effect
+
+        logger.warning(f"Effect has no recognized API: {effect_cid[:16]}...")
+        return None
+
+    except Exception as e:
+        logger.error(f"Failed to load effect {effect_cid[:16]}...: {e}")
+        return None
+
+
+def _wrap_frame_effect(module: ModuleType, effect_path: Path) -> EffectFn:
+    """Wrap a frame-by-frame effect to work with the executor API."""
+
+    def wrapped_effect(input_path: Path, output_path: Path, config: Dict[str, Any]) -> Path:
+        """Run frame-by-frame effect through FFmpeg pipes."""
+        try:
+            from ..effects.frame_processor import process_video
+        except ImportError:
+            logger.error("Frame processor not available - falling back to copy")
+            shutil.copy2(input_path, output_path)
+            return output_path
+
+        # Extract params from config (excluding internal keys)
+        params = {k: v for k, v in config.items()
+                  if k not in ("effect", "hash", "_binding")}
+
+        # Get bindings if present
+        bindings = {}
+        for key, value in config.items():
+            if isinstance(value, dict) and value.get("_resolved_values"):
+                bindings[key] = value["_resolved_values"]
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        actual_output = output_path.with_suffix(".mp4")
+
+        process_video(
+            input_path=input_path,
+            output_path=actual_output,
+            process_frame=module.process_frame,
+            params=params,
+            bindings=bindings,
+        )
+
+        return actual_output
+
+    return wrapped_effect
+
+
+def _wrap_video_effect(module: ModuleType) -> EffectFn:
+    """Wrap a whole-video effect to work with the executor API."""
+
+    def wrapped_effect(input_path: Path, output_path: Path, config: Dict[str, Any]) -> Path:
+        """Run whole-video effect."""
+        from ..effects.meta import ExecutionContext
+
+        params = {k: v for k, v in config.items()
+                  if k not in ("effect", "hash", "_binding")}
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        ctx = ExecutionContext(
+            input_paths=[str(input_path)],
+            output_path=str(output_path),
+            params=params,
+            seed=hash(str(input_path)) & 0xFFFFFFFF,
+        )
+
+        module.process([input_path], output_path, params, ctx)
+        return output_path
+
+    return wrapped_effect
+
+
+# Effect registry - maps effect names to implementations
+_EFFECTS: Dict[str, EffectFn] = {}
+
+
+def register_effect(name: str) -> Callable[[F], F]:
+    """Decorator to register an effect implementation."""
+    def decorator(func: F) -> F:
+        _EFFECTS[name] = func  # type: ignore[assignment]
+        return func
+    return decorator
+
+
+def get_effect(name: str) -> Optional[EffectFn]:
+    """Get an effect implementation by name."""
+    return _EFFECTS.get(name)
+
+
+# Built-in effects
+
+@register_effect("identity")
+def effect_identity(input_path: Path, output_path: Path, config: Dict[str, Any]) -> Path:
+    """
+    Identity effect - returns input unchanged.
+
+    This is the foundational effect: identity(x) = x
+    """
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # Remove existing output if any
+    if output_path.exists() or output_path.is_symlink():
+        output_path.unlink()
+
+    # Preserve extension from input
+    actual_output = output_path.with_suffix(input_path.suffix)
+    if actual_output.exists() or actual_output.is_symlink():
+        actual_output.unlink()
+
+    # Symlink to input (zero-copy identity)
+    os.symlink(input_path.resolve(), actual_output)
+    logger.debug(f"EFFECT identity: {input_path.name} -> {actual_output}")
+
+    return actual_output
+
+
+def _get_sexp_effect(effect_path: str, recipe_dir: Path = None) -> Optional[EffectFn]:
+    """
+    Load a sexp effect from a .sexp file.
+
+    Args:
+        effect_path: Relative path to the .sexp effect file
+        recipe_dir: Base directory for resolving paths
+
+    Returns:
+        Effect function or None if not a sexp effect
+    """
+    if not effect_path or not effect_path.endswith(".sexp"):
+        return None
+
+    try:
+        from ..sexp.effect_loader import SexpEffectLoader
+    except ImportError:
+        logger.warning("Sexp effect loader not available")
+        return None
+
+    try:
+        loader = SexpEffectLoader(recipe_dir or Path.cwd())
+        return loader.load_effect_path(effect_path)
+    except Exception as e:
+        logger.error(f"Failed to load sexp effect from {effect_path}: {e}")
+        return None
+
+
+def _get_python_primitive_effect(effect_name: str) -> Optional[EffectFn]:
+    """
+    Get a Python primitive frame processor effect.
+
+    Checks if the effect has a python_primitive in FFmpegCompiler.EFFECT_MAPPINGS
+    and wraps it for the executor API.
+    """
+    try:
+        from ..sexp.ffmpeg_compiler import FFmpegCompiler
+        from ..sexp.primitives import get_primitive
+        from ..effects.frame_processor import process_video
+    except ImportError:
+        return None
+
+    compiler = FFmpegCompiler()
+    primitive_name = compiler.has_python_primitive(effect_name)
+    if not primitive_name:
+        return None
+
+    primitive_fn = get_primitive(primitive_name)
+    if not primitive_fn:
+        logger.warning(f"Python primitive '{primitive_name}' not found for effect '{effect_name}'")
+        return None
+
+    def wrapped_effect(input_path: Path, output_path: Path, config: Dict[str, Any]) -> Path:
+        """Run Python primitive effect via frame processor."""
+        # Extract params (excluding internal keys)
+        params = {k: v for k, v in config.items()
+                  if k not in ("effect", "cid", "hash", "effect_path", "_binding")}
+
+        # Get bindings if present
+        bindings = {}
+        for key, value in config.items():
+            if isinstance(value, dict) and value.get("_resolved_values"):
+                bindings[key] = value["_resolved_values"]
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        actual_output = output_path.with_suffix(".mp4")
+
+        # Wrap primitive to match frame processor signature
+        def process_frame(frame, frame_params, state):
+            # Call primitive with frame and params
+            result = primitive_fn(frame, **frame_params)
+            return result, state
+
+        process_video(
+            input_path=input_path,
+            output_path=actual_output,
+            process_frame=process_frame,
+            params=params,
+            bindings=bindings,
+        )
+
+        logger.info(f"Processed effect '{effect_name}' via Python primitive '{primitive_name}'")
+        return actual_output
+
+    return wrapped_effect
+
+
+@register_executor("EFFECT")
+class EffectExecutor(Executor):
+    """
+    Apply an effect from the registry or IPFS.
+
+    Config:
+        effect: Name of the effect to apply
+        cid: IPFS CID for the effect (fetched from IPFS if not cached)
+        hash: Legacy alias for cid (backwards compatibility)
+        params: Optional parameters for the effect
+
+    Inputs:
+        Single input file to transform
+    """
+
+    def execute(
+        self,
+        config: Dict[str, Any],
+        inputs: List[Path],
+        output_path: Path,
+    ) -> Path:
+        effect_name = config.get("effect")
+        # Support both "cid" (new) and "hash" (legacy)
+        effect_cid = config.get("cid") or config.get("hash")
+
+        if not effect_name:
+            raise ValueError("EFFECT requires 'effect' config")
+
+        if len(inputs) != 1:
+            raise ValueError(f"EFFECT expects 1 input, got {len(inputs)}")
+
+        # Try IPFS effect first if CID provided
+        effect_fn: Optional[EffectFn] = None
+        if effect_cid:
+            effect_fn = _load_cached_effect(effect_cid)
+            if effect_fn:
+                logger.info(f"Running effect '{effect_name}' (cid={effect_cid[:16]}...)")
+
+        # Try sexp effect from effect_path (.sexp file)
+        if effect_fn is None:
+            effect_path = config.get("effect_path")
+            if effect_path and effect_path.endswith(".sexp"):
+                effect_fn = _get_sexp_effect(effect_path)
+                if effect_fn:
+                    logger.info(f"Running effect '{effect_name}' via sexp definition")
+
+        # Try Python primitive (from FFmpegCompiler.EFFECT_MAPPINGS)
+        if effect_fn is None:
+            effect_fn = _get_python_primitive_effect(effect_name)
+            if effect_fn:
+                logger.info(f"Running effect '{effect_name}' via Python primitive")
+
+        # Fall back to built-in effect
+        if effect_fn is None:
+            effect_fn = get_effect(effect_name)
+
+        if effect_fn is None:
+            raise ValueError(f"Unknown effect: {effect_name}")
+
+        # Pass full config (effect can extract what it needs)
+        return effect_fn(inputs[0], output_path, config)
+
+    def validate_config(self, config: Dict[str, Any]) -> List[str]:
+        errors = []
+        if "effect" not in config:
+            errors.append("EFFECT requires 'effect' config")
+        else:
+            # If CID provided, we'll load from IPFS - skip built-in check
+            has_cid = config.get("cid") or config.get("hash")
+            if not has_cid and get_effect(config["effect"]) is None:
+                errors.append(f"Unknown effect: {config['effect']}")
+        return errors
--- a/core/artdag/nodes/encoding.py
+++ b/core/artdag/nodes/encoding.py
@@ -0,0 +1,50 @@
+# artdag/nodes/encoding.py
+"""
+Web-optimized video encoding settings.
+
+Provides common FFmpeg arguments for producing videos that:
+- Stream efficiently (faststart)
+- Play on all browsers (H.264 High profile)
+- Support seeking (regular keyframes)
+"""
+
+from typing import List
+
+# Standard web-optimized video encoding arguments
+WEB_VIDEO_ARGS: List[str] = [
+    "-c:v", "libx264",
+    "-preset", "fast",
+    "-crf", "18",
+    "-profile:v", "high",
+    "-level", "4.1",
+    "-pix_fmt", "yuv420p",  # Ensure broad compatibility
+    "-movflags", "+faststart",  # Enable streaming before full download
+    "-g", "48",  # Keyframe every ~2 seconds at 24fps (for seeking)
+]
+
+# Standard audio encoding arguments
+WEB_AUDIO_ARGS: List[str] = [
+    "-c:a", "aac",
+    "-b:a", "192k",
+]
+
+
+def get_web_encoding_args() -> List[str]:
+    """Get FFmpeg args for web-optimized video+audio encoding."""
+    return WEB_VIDEO_ARGS + WEB_AUDIO_ARGS
+
+
+def get_web_video_args() -> List[str]:
+    """Get FFmpeg args for web-optimized video encoding only."""
+    return WEB_VIDEO_ARGS.copy()
+
+
+def get_web_audio_args() -> List[str]:
+    """Get FFmpeg args for web-optimized audio encoding only."""
+    return WEB_AUDIO_ARGS.copy()
+
+
+# For shell commands (string format)
+WEB_VIDEO_ARGS_STR = " ".join(WEB_VIDEO_ARGS)
+WEB_AUDIO_ARGS_STR = " ".join(WEB_AUDIO_ARGS)
+WEB_ENCODING_ARGS_STR = f"{WEB_VIDEO_ARGS_STR} {WEB_AUDIO_ARGS_STR}"
--- a/core/artdag/nodes/source.py
+++ b/core/artdag/nodes/source.py
@@ -0,0 +1,62 @@
+# primitive/nodes/source.py
+"""
+Source executors: Load media from paths.
+
+Primitives: SOURCE
+"""
+
+import logging
+import os
+import shutil
+from pathlib import Path
+from typing import Any, Dict, List
+
+from ..dag import NodeType
+from ..executor import Executor, register_executor
+
+logger = logging.getLogger(__name__)
+
+
+@register_executor(NodeType.SOURCE)
+class SourceExecutor(Executor):
+    """
+    Load source media from a path.
+
+    Config:
+        path: Path to source file
+
+    Creates a symlink to the source file for zero-copy loading.
+    """
+
+    def execute(
+        self,
+        config: Dict[str, Any],
+        inputs: List[Path],
+        output_path: Path,
+    ) -> Path:
+        source_path = Path(config["path"])
+
+        if not source_path.exists():
+            raise FileNotFoundError(f"Source file not found: {source_path}")
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Use symlink for zero-copy
+        if output_path.exists() or output_path.is_symlink():
+            output_path.unlink()
+
+        # Preserve extension from source
+        actual_output = output_path.with_suffix(source_path.suffix)
+        if actual_output.exists() or actual_output.is_symlink():
+            actual_output.unlink()
+
+        os.symlink(source_path.resolve(), actual_output)
+        logger.debug(f"SOURCE: {source_path.name} -> {actual_output}")
+
+        return actual_output
+
+    def validate_config(self, config: Dict[str, Any]) -> List[str]:
+        errors = []
+        if "path" not in config:
+            errors.append("SOURCE requires 'path' config")
+        return errors
--- a/core/artdag/nodes/transform.py
+++ b/core/artdag/nodes/transform.py
@@ -0,0 +1,224 @@
+# primitive/nodes/transform.py
+"""
+Transform executors: Modify single media inputs.
+
+Primitives: SEGMENT, RESIZE, TRANSFORM
+"""
+
+import logging
+import subprocess
+from pathlib import Path
+from typing import Any, Dict, List
+
+from ..dag import NodeType
+from ..executor import Executor, register_executor
+from .encoding import get_web_encoding_args, get_web_video_args
+
+logger = logging.getLogger(__name__)
+
+
+@register_executor(NodeType.SEGMENT)
+class SegmentExecutor(Executor):
+    """
+    Extract a time segment from media.
+
+    Config:
+        offset: Start time in seconds (default: 0)
+        duration: Duration in seconds (optional, default: to end)
+        precise: Use frame-accurate seeking (default: True)
+    """
+
+    def execute(
+        self,
+        config: Dict[str, Any],
+        inputs: List[Path],
+        output_path: Path,
+    ) -> Path:
+        if len(inputs) != 1:
+            raise ValueError("SEGMENT requires exactly one input")
+
+        input_path = inputs[0]
+        offset = config.get("offset", 0)
+        duration = config.get("duration")
+        precise = config.get("precise", True)
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        if precise:
+            # Frame-accurate: decode-seek (slower but precise)
+            cmd = ["ffmpeg", "-y", "-i", str(input_path)]
+            if offset > 0:
+                cmd.extend(["-ss", str(offset)])
+            if duration:
+                cmd.extend(["-t", str(duration)])
+            cmd.extend([*get_web_encoding_args(), str(output_path)])
+        else:
+            # Fast: input-seek at keyframes (may be slightly off)
+            cmd = ["ffmpeg", "-y"]
+            if offset > 0:
+                cmd.extend(["-ss", str(offset)])
+            cmd.extend(["-i", str(input_path)])
+            if duration:
+                cmd.extend(["-t", str(duration)])
+            cmd.extend(["-c", "copy", str(output_path)])
+
+        logger.debug(f"SEGMENT: offset={offset}, duration={duration}, precise={precise}")
+        result = subprocess.run(cmd, capture_output=True, text=True)
+
+        if result.returncode != 0:
+            raise RuntimeError(f"Segment failed: {result.stderr}")
+
+        return output_path
+
+
+@register_executor(NodeType.RESIZE)
+class ResizeExecutor(Executor):
+    """
+    Resize media to target dimensions.
+
+    Config:
+        width: Target width
+        height: Target height
+        mode: "fit" (letterbox), "fill" (crop), "stretch", "pad"
+        background: Background color for pad mode (default: black)
+    """
+
+    def execute(
+        self,
+        config: Dict[str, Any],
+        inputs: List[Path],
+        output_path: Path,
+    ) -> Path:
+        if len(inputs) != 1:
+            raise ValueError("RESIZE requires exactly one input")
+
+        input_path = inputs[0]
+        width = config["width"]
+        height = config["height"]
+        mode = config.get("mode", "fit")
+        background = config.get("background", "black")
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        if mode == "fit":
+            # Scale to fit, add letterboxing
+            vf = f"scale={width}:{height}:force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color={background}"
+        elif mode == "fill":
+            # Scale to fill, crop excess
+            vf = f"scale={width}:{height}:force_original_aspect_ratio=increase,crop={width}:{height}"
+        elif mode == "stretch":
+            # Stretch to exact size
+            vf = f"scale={width}:{height}"
+        elif mode == "pad":
+            # Scale down only if larger, then pad
+            vf = f"scale='min({width},iw)':'min({height},ih)':force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color={background}"
+        else:
+            raise ValueError(f"Unknown resize mode: {mode}")
+
+        cmd = [
+            "ffmpeg", "-y",
+            "-i", str(input_path),
+            "-vf", vf,
+            *get_web_video_args(),
+            "-c:a", "copy",
+            str(output_path)
+        ]
+
+        logger.debug(f"RESIZE: {width}x{height} ({mode}) (web-optimized)")
+        result = subprocess.run(cmd, capture_output=True, text=True)
+
+        if result.returncode != 0:
+            raise RuntimeError(f"Resize failed: {result.stderr}")
+
+        return output_path
+
+
+@register_executor(NodeType.TRANSFORM)
+class TransformExecutor(Executor):
+    """
+    Apply visual effects to media.
+
+    Config:
+        effects: Dict of effect -> value
+            saturation: 0.0-2.0 (1.0 = normal)
+            contrast: 0.0-2.0 (1.0 = normal)
+            brightness: -1.0 to 1.0 (0.0 = normal)
+            gamma: 0.1-10.0 (1.0 = normal)
+            hue: degrees shift
+            blur: blur radius
+            sharpen: sharpen amount
+            speed: playback speed multiplier
+    """
+
+    def execute(
+        self,
+        config: Dict[str, Any],
+        inputs: List[Path],
+        output_path: Path,
+    ) -> Path:
+        if len(inputs) != 1:
+            raise ValueError("TRANSFORM requires exactly one input")
+
+        input_path = inputs[0]
+        effects = config.get("effects", {})
+
+        if not effects:
+            # No effects - just copy
+            import shutil
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            shutil.copy2(input_path, output_path)
+            return output_path
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Build filter chain
+        vf_parts = []
+        af_parts = []
+
+        # Color adjustments via eq filter
+        eq_parts = []
+        if "saturation" in effects:
+            eq_parts.append(f"saturation={effects['saturation']}")
+        if "contrast" in effects:
+            eq_parts.append(f"contrast={effects['contrast']}")
+        if "brightness" in effects:
+            eq_parts.append(f"brightness={effects['brightness']}")
+        if "gamma" in effects:
+            eq_parts.append(f"gamma={effects['gamma']}")
+        if eq_parts:
+            vf_parts.append(f"eq={':'.join(eq_parts)}")
+
+        # Hue adjustment
+        if "hue" in effects:
+            vf_parts.append(f"hue=h={effects['hue']}")
+
+        # Blur
+        if "blur" in effects:
+            vf_parts.append(f"boxblur={effects['blur']}")
+
+        # Sharpen
+        if "sharpen" in effects:
+            vf_parts.append(f"unsharp=5:5:{effects['sharpen']}:5:5:0")
+
+        # Speed change
+        if "speed" in effects:
+            speed = effects["speed"]
+            vf_parts.append(f"setpts={1/speed}*PTS")
+            af_parts.append(f"atempo={speed}")
+
+        cmd = ["ffmpeg", "-y", "-i", str(input_path)]
+
+        if vf_parts:
+            cmd.extend(["-vf", ",".join(vf_parts)])
+        if af_parts:
+            cmd.extend(["-af", ",".join(af_parts)])
+
+        cmd.extend([*get_web_encoding_args(), str(output_path)])
+
+        logger.debug(f"TRANSFORM: {list(effects.keys())} (web-optimized)")
+        result = subprocess.run(cmd, capture_output=True, text=True)
+
+        if result.returncode != 0:
+            raise RuntimeError(f"Transform failed: {result.stderr}")
+
+        return output_path
--- a/core/artdag/planning/init.py
+++ b/core/artdag/planning/init.py
@@ -0,0 +1,29 @@
+# artdag/planning - Execution plan generation
+#
+# Provides the Planning phase of the 3-phase execution model:
+# 1. ANALYZE - Extract features from inputs
+# 2. PLAN - Generate execution plan with cache IDs
+# 3. EXECUTE - Run steps with caching
+
+from .schema import (
+    ExecutionStep,
+    ExecutionPlan,
+    StepStatus,
+    StepOutput,
+    StepInput,
+    PlanInput,
+)
+from .planner import RecipePlanner, Recipe
+from .tree_reduction import TreeReducer
+
+__all__ = [
+    "ExecutionStep",
+    "ExecutionPlan",
+    "StepStatus",
+    "StepOutput",
+    "StepInput",
+    "PlanInput",
+    "RecipePlanner",
+    "Recipe",
+    "TreeReducer",
+]
--- a/core/artdag/planning/planner.py
+++ b/core/artdag/planning/planner.py
@@ -0,0 +1,756 @@
+# artdag/planning/planner.py
+"""
+Recipe planner - converts recipes into execution plans.
+
+The planner is the second phase of the 3-phase execution model.
+It takes a recipe and analysis results and generates a complete
+execution plan with pre-computed cache IDs.
+"""
+
+import hashlib
+import json
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+import yaml
+
+from .schema import ExecutionPlan, ExecutionStep, StepOutput, StepInput, PlanInput
+from .tree_reduction import TreeReducer, reduce_sequence
+from ..analysis import AnalysisResult
+
+
+def _infer_media_type(node_type: str, config: Dict[str, Any] = None) -> str:
+    """Infer media type from node type and config."""
+    config = config or {}
+
+    # Audio operations
+    if node_type in ("AUDIO", "MIX_AUDIO", "EXTRACT_AUDIO"):
+        return "audio/wav"
+    if "audio" in node_type.lower():
+        return "audio/wav"
+
+    # Image operations
+    if node_type in ("FRAME", "THUMBNAIL", "IMAGE"):
+        return "image/png"
+
+    # Default to video
+    return "video/mp4"
+
+logger = logging.getLogger(__name__)
+
+
+def _stable_hash(data: Any, algorithm: str = "sha3_256") -> str:
+    """Create stable hash from arbitrary data."""
+    json_str = json.dumps(data, sort_keys=True, separators=(",", ":"))
+    hasher = hashlib.new(algorithm)
+    hasher.update(json_str.encode())
+    return hasher.hexdigest()
+
+
+@dataclass
+class RecipeNode:
+    """A node in the recipe DAG."""
+    id: str
+    type: str
+    config: Dict[str, Any]
+    inputs: List[str]
+
+
+@dataclass
+class Recipe:
+    """Parsed recipe structure."""
+    name: str
+    version: str
+    description: str
+    nodes: List[RecipeNode]
+    output: str
+    registry: Dict[str, Any]
+    owner: str
+    raw_yaml: str
+
+    @property
+    def recipe_hash(self) -> str:
+        """Compute hash of recipe content."""
+        return _stable_hash({"yaml": self.raw_yaml})
+
+    @classmethod
+    def from_yaml(cls, yaml_content: str) -> "Recipe":
+        """Parse recipe from YAML string."""
+        data = yaml.safe_load(yaml_content)
+
+        nodes = []
+        for node_data in data.get("dag", {}).get("nodes", []):
+            # Handle both 'inputs' as list and 'inputs' as dict
+            inputs = node_data.get("inputs", [])
+            if isinstance(inputs, dict):
+                # Extract input references from dict structure
+                input_list = []
+                for key, value in inputs.items():
+                    if isinstance(value, str):
+                        input_list.append(value)
+                    elif isinstance(value, list):
+                        input_list.extend(value)
+                inputs = input_list
+            elif isinstance(inputs, str):
+                inputs = [inputs]
+
+            nodes.append(RecipeNode(
+                id=node_data["id"],
+                type=node_data["type"],
+                config=node_data.get("config", {}),
+                inputs=inputs,
+            ))
+
+        return cls(
+            name=data.get("name", "unnamed"),
+            version=data.get("version", "1.0"),
+            description=data.get("description", ""),
+            nodes=nodes,
+            output=data.get("dag", {}).get("output", ""),
+            registry=data.get("registry", {}),
+            owner=data.get("owner", ""),
+            raw_yaml=yaml_content,
+        )
+
+    @classmethod
+    def from_file(cls, path: Path) -> "Recipe":
+        """Load recipe from YAML file."""
+        with open(path, "r") as f:
+            return cls.from_yaml(f.read())
+
+
+class RecipePlanner:
+    """
+    Generates execution plans from recipes.
+
+    The planner:
+    1. Parses the recipe
+    2. Resolves fixed inputs from registry
+    3. Maps variable inputs to provided hashes
+    4. Expands MAP/iteration nodes
+    5. Applies tree reduction for SEQUENCE nodes
+    6. Computes cache IDs for all steps
+    """
+
+    def __init__(self, use_tree_reduction: bool = True):
+        """
+        Initialize the planner.
+
+        Args:
+            use_tree_reduction: Whether to use tree reduction for SEQUENCE
+        """
+        self.use_tree_reduction = use_tree_reduction
+
+    def plan(
+        self,
+        recipe: Recipe,
+        input_hashes: Dict[str, str],
+        analysis: Optional[Dict[str, AnalysisResult]] = None,
+        seed: Optional[int] = None,
+    ) -> ExecutionPlan:
+        """
+        Generate an execution plan from a recipe.
+
+        Args:
+            recipe: The parsed recipe
+            input_hashes: Mapping from input name to content hash
+            analysis: Analysis results for inputs (keyed by hash)
+            seed: Random seed for deterministic planning
+
+        Returns:
+            ExecutionPlan with pre-computed cache IDs
+        """
+        logger.info(f"Planning recipe: {recipe.name}")
+
+        # Build node lookup
+        nodes_by_id = {n.id: n for n in recipe.nodes}
+
+        # Topologically sort nodes
+        sorted_ids = self._topological_sort(recipe.nodes)
+
+        # Resolve registry references
+        registry_hashes = self._resolve_registry(recipe.registry)
+
+        # Build PlanInput objects from input_hashes
+        plan_inputs = []
+        for name, cid in input_hashes.items():
+            # Try to find matching SOURCE node for media type
+            media_type = "application/octet-stream"
+            for node in recipe.nodes:
+                if node.id == name and node.type == "SOURCE":
+                    media_type = _infer_media_type("SOURCE", node.config)
+                    break
+
+            plan_inputs.append(PlanInput(
+                name=name,
+                cache_id=cid,
+                cid=cid,
+                media_type=media_type,
+            ))
+
+        # Generate steps
+        steps = []
+        step_id_map = {}  # Maps recipe node ID to step ID(s)
+        step_name_map = {}  # Maps recipe node ID to human-readable name
+        analysis_cache_ids = {}
+
+        for node_id in sorted_ids:
+            node = nodes_by_id[node_id]
+            logger.debug(f"Processing node: {node.id} ({node.type})")
+
+            new_steps, output_step_id = self._process_node(
+                node=node,
+                step_id_map=step_id_map,
+                step_name_map=step_name_map,
+                input_hashes=input_hashes,
+                registry_hashes=registry_hashes,
+                analysis=analysis or {},
+                recipe_name=recipe.name,
+            )
+
+            steps.extend(new_steps)
+            step_id_map[node_id] = output_step_id
+            # Track human-readable name for this node
+            if new_steps:
+                step_name_map[node_id] = new_steps[-1].name
+
+        # Find output step
+        output_step = step_id_map.get(recipe.output)
+        if not output_step:
+            raise ValueError(f"Output node '{recipe.output}' not found")
+
+        # Determine output name
+        output_name = f"{recipe.name}.output"
+        output_step_obj = next((s for s in steps if s.step_id == output_step), None)
+        if output_step_obj and output_step_obj.outputs:
+            output_name = output_step_obj.outputs[0].name
+
+        # Build analysis cache IDs
+        if analysis:
+            analysis_cache_ids = {
+                h: a.cache_id for h, a in analysis.items()
+                if a.cache_id
+            }
+
+        # Create plan
+        plan = ExecutionPlan(
+            plan_id=None,  # Computed in __post_init__
+            name=f"{recipe.name}_plan",
+            recipe_id=recipe.name,
+            recipe_name=recipe.name,
+            recipe_hash=recipe.recipe_hash,
+            seed=seed,
+            inputs=plan_inputs,
+            steps=steps,
+            output_step=output_step,
+            output_name=output_name,
+            analysis_cache_ids=analysis_cache_ids,
+            input_hashes=input_hashes,
+            metadata={
+                "recipe_version": recipe.version,
+                "recipe_description": recipe.description,
+                "owner": recipe.owner,
+            },
+        )
+
+        # Compute all cache IDs and then generate outputs
+        plan.compute_all_cache_ids()
+        plan.compute_levels()
+
+        # Now add outputs to each step (needs cache_id to be computed first)
+        self._add_step_outputs(plan, recipe.name)
+
+        # Recompute plan_id after outputs are added
+        plan.plan_id = plan._compute_plan_id()
+
+        logger.info(f"Generated plan with {len(steps)} steps")
+        return plan
+
+    def _add_step_outputs(self, plan: ExecutionPlan, recipe_name: str) -> None:
+        """Add output definitions to each step after cache_ids are computed."""
+        for step in plan.steps:
+            if step.outputs:
+                continue  # Already has outputs
+
+            # Generate output name from step name
+            base_name = step.name or step.step_id
+            output_name = f"{recipe_name}.{base_name}.out"
+
+            media_type = _infer_media_type(step.node_type, step.config)
+
+            step.add_output(
+                name=output_name,
+                media_type=media_type,
+                index=0,
+                metadata={},
+            )
+
+    def plan_from_yaml(
+        self,
+        yaml_content: str,
+        input_hashes: Dict[str, str],
+        analysis: Optional[Dict[str, AnalysisResult]] = None,
+    ) -> ExecutionPlan:
+        """
+        Generate plan from YAML string.
+
+        Args:
+            yaml_content: Recipe YAML content
+            input_hashes: Mapping from input name to content hash
+            analysis: Analysis results
+
+        Returns:
+            ExecutionPlan
+        """
+        recipe = Recipe.from_yaml(yaml_content)
+        return self.plan(recipe, input_hashes, analysis)
+
+    def plan_from_file(
+        self,
+        recipe_path: Path,
+        input_hashes: Dict[str, str],
+        analysis: Optional[Dict[str, AnalysisResult]] = None,
+    ) -> ExecutionPlan:
+        """
+        Generate plan from recipe file.
+
+        Args:
+            recipe_path: Path to recipe YAML file
+            input_hashes: Mapping from input name to content hash
+            analysis: Analysis results
+
+        Returns:
+            ExecutionPlan
+        """
+        recipe = Recipe.from_file(recipe_path)
+        return self.plan(recipe, input_hashes, analysis)
+
+    def _topological_sort(self, nodes: List[RecipeNode]) -> List[str]:
+        """Topologically sort recipe nodes."""
+        nodes_by_id = {n.id: n for n in nodes}
+        visited = set()
+        order = []
+
+        def visit(node_id: str):
+            if node_id in visited:
+                return
+            if node_id not in nodes_by_id:
+                return  # External input
+            visited.add(node_id)
+            node = nodes_by_id[node_id]
+            for input_id in node.inputs:
+                visit(input_id)
+            order.append(node_id)
+
+        for node in nodes:
+            visit(node.id)
+
+        return order
+
+    def _resolve_registry(self, registry: Dict[str, Any]) -> Dict[str, str]:
+        """
+        Resolve registry references to content hashes.
+
+        Args:
+            registry: Registry section from recipe
+
+        Returns:
+            Mapping from name to content hash
+        """
+        hashes = {}
+
+        # Assets
+        for name, asset_data in registry.get("assets", {}).items():
+            if isinstance(asset_data, dict) and "hash" in asset_data:
+                hashes[name] = asset_data["hash"]
+            elif isinstance(asset_data, str):
+                hashes[name] = asset_data
+
+        # Effects
+        for name, effect_data in registry.get("effects", {}).items():
+            if isinstance(effect_data, dict) and "hash" in effect_data:
+                hashes[f"effect:{name}"] = effect_data["hash"]
+            elif isinstance(effect_data, str):
+                hashes[f"effect:{name}"] = effect_data
+
+        return hashes
+
+    def _process_node(
+        self,
+        node: RecipeNode,
+        step_id_map: Dict[str, str],
+        step_name_map: Dict[str, str],
+        input_hashes: Dict[str, str],
+        registry_hashes: Dict[str, str],
+        analysis: Dict[str, AnalysisResult],
+        recipe_name: str = "",
+    ) -> Tuple[List[ExecutionStep], str]:
+        """
+        Process a recipe node into execution steps.
+
+        Args:
+            node: Recipe node to process
+            step_id_map: Mapping from processed node IDs to step IDs
+            step_name_map: Mapping from node IDs to human-readable names
+            input_hashes: User-provided input hashes
+            registry_hashes: Registry-resolved hashes
+            analysis: Analysis results
+            recipe_name: Name of the recipe (for generating readable names)
+
+        Returns:
+            Tuple of (new steps, output step ID)
+        """
+        # SOURCE nodes
+        if node.type == "SOURCE":
+            return self._process_source(node, input_hashes, registry_hashes, recipe_name)
+
+        # SOURCE_LIST nodes
+        if node.type == "SOURCE_LIST":
+            return self._process_source_list(node, input_hashes, recipe_name)
+
+        # ANALYZE nodes
+        if node.type == "ANALYZE":
+            return self._process_analyze(node, step_id_map, analysis, recipe_name)
+
+        # MAP nodes
+        if node.type == "MAP":
+            return self._process_map(node, step_id_map, input_hashes, analysis, recipe_name)
+
+        # SEQUENCE nodes (may use tree reduction)
+        if node.type == "SEQUENCE":
+            return self._process_sequence(node, step_id_map, recipe_name)
+
+        # SEGMENT_AT nodes
+        if node.type == "SEGMENT_AT":
+            return self._process_segment_at(node, step_id_map, analysis, recipe_name)
+
+        # Standard nodes (SEGMENT, RESIZE, TRANSFORM, LAYER, MUX, BLEND, etc.)
+        return self._process_standard(node, step_id_map, recipe_name)
+
+    def _process_source(
+        self,
+        node: RecipeNode,
+        input_hashes: Dict[str, str],
+        registry_hashes: Dict[str, str],
+        recipe_name: str = "",
+    ) -> Tuple[List[ExecutionStep], str]:
+        """Process SOURCE node."""
+        config = dict(node.config)
+
+        # Variable input?
+        if config.get("input"):
+            # Look up in user-provided inputs
+            if node.id not in input_hashes:
+                raise ValueError(f"Missing input for SOURCE node '{node.id}'")
+            cid = input_hashes[node.id]
+        # Fixed asset from registry?
+        elif config.get("asset"):
+            asset_name = config["asset"]
+            if asset_name not in registry_hashes:
+                raise ValueError(f"Asset '{asset_name}' not found in registry")
+            cid = registry_hashes[asset_name]
+        else:
+            raise ValueError(f"SOURCE node '{node.id}' has no input or asset")
+
+        # Human-readable name
+        display_name = config.get("name", node.id)
+        step_name = f"{recipe_name}.inputs.{display_name}" if recipe_name else display_name
+
+        step = ExecutionStep(
+            step_id=node.id,
+            node_type="SOURCE",
+            config={"input_ref": node.id, "cid": cid},
+            input_steps=[],
+            cache_id=cid,  # SOURCE cache_id is just the content hash
+            name=step_name,
+        )
+
+        return [step], step.step_id
+
+    def _process_source_list(
+        self,
+        node: RecipeNode,
+        input_hashes: Dict[str, str],
+        recipe_name: str = "",
+    ) -> Tuple[List[ExecutionStep], str]:
+        """
+        Process SOURCE_LIST node.
+
+        Creates individual SOURCE steps for each item in the list.
+        """
+        # Look for list input
+        if node.id not in input_hashes:
+            raise ValueError(f"Missing input for SOURCE_LIST node '{node.id}'")
+
+        input_value = input_hashes[node.id]
+
+        # Parse as comma-separated list if string
+        if isinstance(input_value, str):
+            items = [h.strip() for h in input_value.split(",")]
+        else:
+            items = list(input_value)
+
+        display_name = node.config.get("name", node.id)
+        base_name = f"{recipe_name}.{display_name}" if recipe_name else display_name
+
+        steps = []
+        for i, cid in enumerate(items):
+            step = ExecutionStep(
+                step_id=f"{node.id}_{i}",
+                node_type="SOURCE",
+                config={"input_ref": f"{node.id}[{i}]", "cid": cid},
+                input_steps=[],
+                cache_id=cid,
+                name=f"{base_name}[{i}]",
+            )
+            steps.append(step)
+
+        # Return list marker as output
+        list_step = ExecutionStep(
+            step_id=node.id,
+            node_type="_LIST",
+            config={"items": [s.step_id for s in steps]},
+            input_steps=[s.step_id for s in steps],
+            name=f"{base_name}.list",
+        )
+        steps.append(list_step)
+
+        return steps, list_step.step_id
+
+    def _process_analyze(
+        self,
+        node: RecipeNode,
+        step_id_map: Dict[str, str],
+        analysis: Dict[str, AnalysisResult],
+        recipe_name: str = "",
+    ) -> Tuple[List[ExecutionStep], str]:
+        """
+        Process ANALYZE node.
+
+        ANALYZE nodes reference pre-computed analysis results.
+        """
+        input_step = step_id_map.get(node.inputs[0]) if node.inputs else None
+        if not input_step:
+            raise ValueError(f"ANALYZE node '{node.id}' has no input")
+
+        feature = node.config.get("feature", "all")
+        step_name = f"{recipe_name}.analysis.{feature}" if recipe_name else f"analysis.{feature}"
+
+        step = ExecutionStep(
+            step_id=node.id,
+            node_type="ANALYZE",
+            config={
+                "feature": feature,
+                **node.config,
+            },
+            input_steps=[input_step],
+            name=step_name,
+        )
+
+        return [step], step.step_id
+
+    def _process_map(
+        self,
+        node: RecipeNode,
+        step_id_map: Dict[str, str],
+        input_hashes: Dict[str, str],
+        analysis: Dict[str, AnalysisResult],
+        recipe_name: str = "",
+    ) -> Tuple[List[ExecutionStep], str]:
+        """
+        Process MAP node - expand iteration over list.
+
+        MAP applies an operation to each item in a list.
+        """
+        operation = node.config.get("operation", "TRANSFORM")
+        base_name = f"{recipe_name}.{node.id}" if recipe_name else node.id
+
+        # Get items input
+        items_ref = node.config.get("items") or (
+            node.inputs[0] if isinstance(node.inputs, list) else
+            node.inputs.get("items") if isinstance(node.inputs, dict) else None
+        )
+
+        if not items_ref:
+            raise ValueError(f"MAP node '{node.id}' has no items input")
+
+        # Resolve items to list of step IDs
+        if items_ref in step_id_map:
+            # Reference to SOURCE_LIST output
+            items_step = step_id_map[items_ref]
+            # TODO: expand list items
+            logger.warning(f"MAP node '{node.id}' references list step, expansion TBD")
+            item_steps = [items_step]
+        else:
+            item_steps = [items_ref]
+
+        # Generate step for each item
+        steps = []
+        output_steps = []
+
+        for i, item_step in enumerate(item_steps):
+            step_id = f"{node.id}_{i}"
+
+            if operation == "RANDOM_SLICE":
+                step = ExecutionStep(
+                    step_id=step_id,
+                    node_type="SEGMENT",
+                    config={
+                        "random": True,
+                        "seed_from": node.config.get("seed_from"),
+                        "index": i,
+                    },
+                    input_steps=[item_step],
+                    name=f"{base_name}.slice[{i}]",
+                )
+            elif operation == "TRANSFORM":
+                step = ExecutionStep(
+                    step_id=step_id,
+                    node_type="TRANSFORM",
+                    config=node.config.get("effects", {}),
+                    input_steps=[item_step],
+                    name=f"{base_name}.transform[{i}]",
+                )
+            elif operation == "ANALYZE":
+                step = ExecutionStep(
+                    step_id=step_id,
+                    node_type="ANALYZE",
+                    config={"feature": node.config.get("feature", "all")},
+                    input_steps=[item_step],
+                    name=f"{base_name}.analyze[{i}]",
+                )
+            else:
+                step = ExecutionStep(
+                    step_id=step_id,
+                    node_type=operation,
+                    config=node.config,
+                    input_steps=[item_step],
+                    name=f"{base_name}.{operation.lower()}[{i}]",
+                )
+
+            steps.append(step)
+            output_steps.append(step_id)
+
+        # Create list output
+        list_step = ExecutionStep(
+            step_id=node.id,
+            node_type="_LIST",
+            config={"items": output_steps},
+            input_steps=output_steps,
+            name=f"{base_name}.results",
+        )
+        steps.append(list_step)
+
+        return steps, list_step.step_id
+
+    def _process_sequence(
+        self,
+        node: RecipeNode,
+        step_id_map: Dict[str, str],
+        recipe_name: str = "",
+    ) -> Tuple[List[ExecutionStep], str]:
+        """
+        Process SEQUENCE node.
+
+        Uses tree reduction for parallel composition if enabled.
+        """
+        base_name = f"{recipe_name}.{node.id}" if recipe_name else node.id
+
+        # Resolve input steps
+        input_steps = []
+        for input_id in node.inputs:
+            if input_id in step_id_map:
+                input_steps.append(step_id_map[input_id])
+            else:
+                input_steps.append(input_id)
+
+        if len(input_steps) == 0:
+            raise ValueError(f"SEQUENCE node '{node.id}' has no inputs")
+
+        if len(input_steps) == 1:
+            # Single input, no sequence needed
+            return [], input_steps[0]
+
+        transition_config = node.config.get("transition", {"type": "cut"})
+        config = {"transition": transition_config}
+
+        if self.use_tree_reduction and len(input_steps) > 2:
+            # Use tree reduction
+            reduction_steps, output_id = reduce_sequence(
+                input_steps,
+                transition_config=config,
+                id_prefix=node.id,
+            )
+
+            steps = []
+            for i, (step_id, inputs, step_config) in enumerate(reduction_steps):
+                step = ExecutionStep(
+                    step_id=step_id,
+                    node_type="SEQUENCE",
+                    config=step_config,
+                    input_steps=inputs,
+                    name=f"{base_name}.reduce[{i}]",
+                )
+                steps.append(step)
+
+            return steps, output_id
+        else:
+            # Direct sequence
+            step = ExecutionStep(
+                step_id=node.id,
+                node_type="SEQUENCE",
+                config=config,
+                input_steps=input_steps,
+                name=f"{base_name}.concat",
+            )
+            return [step], step.step_id
+
+    def _process_segment_at(
+        self,
+        node: RecipeNode,
+        step_id_map: Dict[str, str],
+        analysis: Dict[str, AnalysisResult],
+        recipe_name: str = "",
+    ) -> Tuple[List[ExecutionStep], str]:
+        """
+        Process SEGMENT_AT node - cut at specific times.
+
+        Creates SEGMENT steps for each time range.
+        """
+        base_name = f"{recipe_name}.{node.id}" if recipe_name else node.id
+        times_from = node.config.get("times_from")
+        distribute = node.config.get("distribute", "round_robin")
+
+        # TODO: Resolve times from analysis
+        # For now, create a placeholder
+        step = ExecutionStep(
+            step_id=node.id,
+            node_type="SEGMENT_AT",
+            config=node.config,
+            input_steps=[step_id_map.get(i, i) for i in node.inputs],
+            name=f"{base_name}.segment",
+        )
+
+        return [step], step.step_id
+
+    def _process_standard(
+        self,
+        node: RecipeNode,
+        step_id_map: Dict[str, str],
+        recipe_name: str = "",
+    ) -> Tuple[List[ExecutionStep], str]:
+        """Process standard transformation/composition node."""
+        base_name = f"{recipe_name}.{node.id}" if recipe_name else node.id
+        input_steps = [step_id_map.get(i, i) for i in node.inputs]
+
+        step = ExecutionStep(
+            step_id=node.id,
+            node_type=node.type,
+            config=node.config,
+            input_steps=input_steps,
+            name=f"{base_name}.{node.type.lower()}",
+        )
+
+        return [step], step.step_id
--- a/core/artdag/planning/schema.py
+++ b/core/artdag/planning/schema.py
@@ -0,0 +1,594 @@
+# artdag/planning/schema.py
+"""
+Data structures for execution plans.
+
+An ExecutionPlan contains all steps needed to execute a recipe,
+with pre-computed cache IDs for each step.
+"""
+
+import hashlib
+import json
+import os
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Any, Dict, List, Optional
+
+
+# Cluster key for trust domains
+# Systems with the same key produce the same cache_ids and can share work
+# Systems with different keys have isolated cache namespaces
+CLUSTER_KEY: Optional[str] = os.environ.get("ARTDAG_CLUSTER_KEY")
+
+
+def set_cluster_key(key: Optional[str]) -> None:
+    """Set the cluster key programmatically."""
+    global CLUSTER_KEY
+    CLUSTER_KEY = key
+
+
+def get_cluster_key() -> Optional[str]:
+    """Get the current cluster key."""
+    return CLUSTER_KEY
+
+
+def _stable_hash(data: Any, algorithm: str = "sha3_256") -> str:
+    """
+    Create stable hash from arbitrary data.
+
+    If ARTDAG_CLUSTER_KEY is set, it's mixed into the hash to create
+    isolated trust domains. Systems with the same key can share work;
+    systems with different keys have separate cache namespaces.
+    """
+    # Mix in cluster key if set
+    if CLUSTER_KEY:
+        data = {"_cluster_key": CLUSTER_KEY, "_data": data}
+
+    json_str = json.dumps(data, sort_keys=True, separators=(",", ":"))
+    hasher = hashlib.new(algorithm)
+    hasher.update(json_str.encode())
+    return hasher.hexdigest()
+
+
+class StepStatus(Enum):
+    """Status of an execution step."""
+    PENDING = "pending"
+    CLAIMED = "claimed"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    CACHED = "cached"
+    FAILED = "failed"
+    SKIPPED = "skipped"
+
+
+@dataclass
+class StepOutput:
+    """
+    A single output from an execution step.
+
+    Nodes may produce multiple outputs (e.g., split_on_beats produces N segments).
+    Each output has a human-readable name and a cache_id for storage.
+
+    Attributes:
+        name: Human-readable name (e.g., "beats.split.segment[0]")
+        cache_id: Content-addressed hash for caching
+        media_type: MIME type of the output (e.g., "video/mp4", "audio/wav")
+        index: Output index for multi-output nodes
+        metadata: Optional additional metadata (time_range, etc.)
+    """
+    name: str
+    cache_id: str
+    media_type: str = "application/octet-stream"
+    index: int = 0
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "name": self.name,
+            "cache_id": self.cache_id,
+            "media_type": self.media_type,
+            "index": self.index,
+            "metadata": self.metadata,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "StepOutput":
+        return cls(
+            name=data["name"],
+            cache_id=data["cache_id"],
+            media_type=data.get("media_type", "application/octet-stream"),
+            index=data.get("index", 0),
+            metadata=data.get("metadata", {}),
+        )
+
+
+@dataclass
+class StepInput:
+    """
+    Reference to an input for a step.
+
+    Inputs can reference outputs from other steps by name.
+
+    Attributes:
+        name: Input slot name (e.g., "video", "audio", "segments")
+        source: Source output name (e.g., "beats.split.segment[0]")
+        cache_id: Resolved cache_id of the source (populated during planning)
+    """
+    name: str
+    source: str
+    cache_id: Optional[str] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "name": self.name,
+            "source": self.source,
+            "cache_id": self.cache_id,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "StepInput":
+        return cls(
+            name=data["name"],
+            source=data["source"],
+            cache_id=data.get("cache_id"),
+        )
+
+
+@dataclass
+class ExecutionStep:
+    """
+    A single step in the execution plan.
+
+    Each step has a pre-computed cache_id that uniquely identifies
+    its output based on its configuration and input cache_ids.
+
+    Steps can produce multiple outputs (e.g., split_on_beats produces N segments).
+    Each output has its own cache_id derived from the step's cache_id + index.
+
+    Attributes:
+        name: Human-readable name relating to recipe (e.g., "beats.split")
+        step_id: Unique identifier (hash) for this step
+        node_type: The primitive type (SOURCE, SEQUENCE, TRANSFORM, etc.)
+        config: Configuration for the primitive
+        input_steps: IDs of steps this depends on (legacy, use inputs for new code)
+        inputs: Structured input references with names and sources
+        cache_id: Pre-computed cache ID (hash of config + input cache_ids)
+        outputs: List of outputs this step produces
+        estimated_duration: Optional estimated execution time
+        level: Dependency level (0 = no dependencies, higher = more deps)
+    """
+    step_id: str
+    node_type: str
+    config: Dict[str, Any]
+    input_steps: List[str] = field(default_factory=list)
+    inputs: List[StepInput] = field(default_factory=list)
+    cache_id: Optional[str] = None
+    outputs: List[StepOutput] = field(default_factory=list)
+    name: Optional[str] = None
+    estimated_duration: Optional[float] = None
+    level: int = 0
+
+    def compute_cache_id(self, input_cache_ids: Dict[str, str]) -> str:
+        """
+        Compute cache ID from configuration and input cache IDs.
+
+        cache_id = SHA3-256(node_type + config + sorted(input_cache_ids))
+
+        Args:
+            input_cache_ids: Mapping from input step_id/name to their cache_id
+
+        Returns:
+            The computed cache_id
+        """
+        # Use structured inputs if available, otherwise fall back to input_steps
+        if self.inputs:
+            resolved_inputs = [
+                inp.cache_id or input_cache_ids.get(inp.source, inp.source)
+                for inp in sorted(self.inputs, key=lambda x: x.name)
+            ]
+        else:
+            resolved_inputs = [input_cache_ids.get(s, s) for s in sorted(self.input_steps)]
+
+        content = {
+            "node_type": self.node_type,
+            "config": self.config,
+            "inputs": resolved_inputs,
+        }
+        self.cache_id = _stable_hash(content)
+        return self.cache_id
+
+    def compute_output_cache_id(self, index: int) -> str:
+        """
+        Compute cache ID for a specific output index.
+
+        output_cache_id = SHA3-256(step_cache_id + index)
+
+        Args:
+            index: The output index
+
+        Returns:
+            Cache ID for that output
+        """
+        if not self.cache_id:
+            raise ValueError("Step cache_id must be computed first")
+        content = {"step_cache_id": self.cache_id, "output_index": index}
+        return _stable_hash(content)
+
+    def add_output(
+        self,
+        name: str,
+        media_type: str = "application/octet-stream",
+        index: Optional[int] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> StepOutput:
+        """
+        Add an output to this step.
+
+        Args:
+            name: Human-readable output name
+            media_type: MIME type of the output
+            index: Output index (defaults to next available)
+            metadata: Optional metadata
+
+        Returns:
+            The created StepOutput
+        """
+        if index is None:
+            index = len(self.outputs)
+
+        cache_id = self.compute_output_cache_id(index)
+        output = StepOutput(
+            name=name,
+            cache_id=cache_id,
+            media_type=media_type,
+            index=index,
+            metadata=metadata or {},
+        )
+        self.outputs.append(output)
+        return output
+
+    def get_output(self, index: int = 0) -> Optional[StepOutput]:
+        """Get output by index."""
+        if index < len(self.outputs):
+            return self.outputs[index]
+        return None
+
+    def get_output_by_name(self, name: str) -> Optional[StepOutput]:
+        """Get output by name."""
+        for output in self.outputs:
+            if output.name == name:
+                return output
+        return None
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "step_id": self.step_id,
+            "name": self.name,
+            "node_type": self.node_type,
+            "config": self.config,
+            "input_steps": self.input_steps,
+            "inputs": [inp.to_dict() for inp in self.inputs],
+            "cache_id": self.cache_id,
+            "outputs": [out.to_dict() for out in self.outputs],
+            "estimated_duration": self.estimated_duration,
+            "level": self.level,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "ExecutionStep":
+        inputs = [StepInput.from_dict(i) for i in data.get("inputs", [])]
+        outputs = [StepOutput.from_dict(o) for o in data.get("outputs", [])]
+        return cls(
+            step_id=data["step_id"],
+            node_type=data["node_type"],
+            config=data.get("config", {}),
+            input_steps=data.get("input_steps", []),
+            inputs=inputs,
+            cache_id=data.get("cache_id"),
+            outputs=outputs,
+            name=data.get("name"),
+            estimated_duration=data.get("estimated_duration"),
+            level=data.get("level", 0),
+        )
+
+    def to_json(self) -> str:
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str) -> "ExecutionStep":
+        return cls.from_dict(json.loads(json_str))
+
+
+@dataclass
+class PlanInput:
+    """
+    An input to the execution plan.
+
+    Attributes:
+        name: Human-readable name from recipe (e.g., "source_video")
+        cache_id: Content hash of the input file
+        cid: Same as cache_id (for clarity)
+        media_type: MIME type of the input
+    """
+    name: str
+    cache_id: str
+    cid: str
+    media_type: str = "application/octet-stream"
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "name": self.name,
+            "cache_id": self.cache_id,
+            "cid": self.cid,
+            "media_type": self.media_type,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "PlanInput":
+        return cls(
+            name=data["name"],
+            cache_id=data["cache_id"],
+            cid=data.get("cid", data["cache_id"]),
+            media_type=data.get("media_type", "application/octet-stream"),
+        )
+
+
+@dataclass
+class ExecutionPlan:
+    """
+    Complete execution plan for a recipe.
+
+    Contains all steps in topological order with pre-computed cache IDs.
+    The plan is deterministic: same recipe + same inputs = same plan.
+
+    Attributes:
+        name: Human-readable plan name from recipe
+        plan_id: Hash of the entire plan (for deduplication)
+        recipe_id: Source recipe identifier
+        recipe_name: Human-readable recipe name
+        recipe_hash: Hash of the recipe content
+        seed: Random seed used for planning
+        steps: List of steps in execution order
+        output_step: ID of the final output step
+        output_name: Human-readable name of the final output
+        inputs: Structured input definitions
+        analysis_cache_ids: Cache IDs of analysis results used
+        input_hashes: Content hashes of input files (legacy, use inputs)
+        created_at: When the plan was generated
+        metadata: Optional additional metadata
+    """
+    plan_id: Optional[str]
+    recipe_id: str
+    recipe_hash: str
+    steps: List[ExecutionStep]
+    output_step: str
+    name: Optional[str] = None
+    recipe_name: Optional[str] = None
+    seed: Optional[int] = None
+    output_name: Optional[str] = None
+    inputs: List[PlanInput] = field(default_factory=list)
+    analysis_cache_ids: Dict[str, str] = field(default_factory=dict)
+    input_hashes: Dict[str, str] = field(default_factory=dict)
+    created_at: Optional[str] = None
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def __post_init__(self):
+        if self.created_at is None:
+            self.created_at = datetime.now(timezone.utc).isoformat()
+        if self.plan_id is None:
+            self.plan_id = self._compute_plan_id()
+
+    def _compute_plan_id(self) -> str:
+        """Compute plan ID from contents."""
+        content = {
+            "recipe_hash": self.recipe_hash,
+            "steps": [s.to_dict() for s in self.steps],
+            "input_hashes": self.input_hashes,
+            "analysis_cache_ids": self.analysis_cache_ids,
+        }
+        return _stable_hash(content)
+
+    def compute_all_cache_ids(self) -> None:
+        """
+        Compute cache IDs for all steps in dependency order.
+
+        Must be called after all steps are added to ensure
+        cache IDs propagate correctly through dependencies.
+        """
+        # Build step lookup
+        step_by_id = {s.step_id: s for s in self.steps}
+
+        # Cache IDs start with input hashes
+        cache_ids = dict(self.input_hashes)
+
+        # Process in order (assumes topological order)
+        for step in self.steps:
+            # For SOURCE steps referencing inputs, use input hash
+            if step.node_type == "SOURCE" and step.config.get("input_ref"):
+                ref = step.config["input_ref"]
+                if ref in self.input_hashes:
+                    step.cache_id = self.input_hashes[ref]
+                    cache_ids[step.step_id] = step.cache_id
+                    continue
+
+            # For other steps, compute from inputs
+            input_cache_ids = {}
+            for input_step_id in step.input_steps:
+                if input_step_id in cache_ids:
+                    input_cache_ids[input_step_id] = cache_ids[input_step_id]
+                elif input_step_id in step_by_id:
+                    # Step should have been processed already
+                    input_cache_ids[input_step_id] = step_by_id[input_step_id].cache_id
+                else:
+                    raise ValueError(f"Input step {input_step_id} not found for {step.step_id}")
+
+            step.compute_cache_id(input_cache_ids)
+            cache_ids[step.step_id] = step.cache_id
+
+        # Recompute plan_id with final cache IDs
+        self.plan_id = self._compute_plan_id()
+
+    def compute_levels(self) -> int:
+        """
+        Compute dependency levels for all steps.
+
+        Level 0 = no dependencies (can start immediately)
+        Level N = depends on steps at level N-1
+
+        Returns:
+            Maximum level (number of sequential dependency levels)
+        """
+        step_by_id = {s.step_id: s for s in self.steps}
+        levels = {}
+
+        def compute_level(step_id: str) -> int:
+            if step_id in levels:
+                return levels[step_id]
+
+            step = step_by_id.get(step_id)
+            if step is None:
+                return 0  # Input from outside the plan
+
+            if not step.input_steps:
+                levels[step_id] = 0
+                step.level = 0
+                return 0
+
+            max_input_level = max(compute_level(s) for s in step.input_steps)
+            level = max_input_level + 1
+            levels[step_id] = level
+            step.level = level
+            return level
+
+        for step in self.steps:
+            compute_level(step.step_id)
+
+        return max(levels.values()) if levels else 0
+
+    def get_steps_by_level(self) -> Dict[int, List[ExecutionStep]]:
+        """
+        Group steps by dependency level.
+
+        Steps at the same level can execute in parallel.
+
+        Returns:
+            Dict mapping level -> list of steps at that level
+        """
+        by_level: Dict[int, List[ExecutionStep]] = {}
+        for step in self.steps:
+            by_level.setdefault(step.level, []).append(step)
+        return by_level
+
+    def get_step(self, step_id: str) -> Optional[ExecutionStep]:
+        """Get step by ID."""
+        for step in self.steps:
+            if step.step_id == step_id:
+                return step
+        return None
+
+    def get_step_by_cache_id(self, cache_id: str) -> Optional[ExecutionStep]:
+        """Get step by cache ID."""
+        for step in self.steps:
+            if step.cache_id == cache_id:
+                return step
+        return None
+
+    def get_step_by_name(self, name: str) -> Optional[ExecutionStep]:
+        """Get step by human-readable name."""
+        for step in self.steps:
+            if step.name == name:
+                return step
+        return None
+
+    def get_all_outputs(self) -> Dict[str, StepOutput]:
+        """
+        Get all outputs from all steps, keyed by output name.
+
+        Returns:
+            Dict mapping output name -> StepOutput
+        """
+        outputs = {}
+        for step in self.steps:
+            for output in step.outputs:
+                outputs[output.name] = output
+        return outputs
+
+    def get_output_cache_ids(self) -> Dict[str, str]:
+        """
+        Get mapping of output names to cache IDs.
+
+        Returns:
+            Dict mapping output name -> cache_id
+        """
+        return {
+            output.name: output.cache_id
+            for step in self.steps
+            for output in step.outputs
+        }
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "plan_id": self.plan_id,
+            "name": self.name,
+            "recipe_id": self.recipe_id,
+            "recipe_name": self.recipe_name,
+            "recipe_hash": self.recipe_hash,
+            "seed": self.seed,
+            "inputs": [i.to_dict() for i in self.inputs],
+            "steps": [s.to_dict() for s in self.steps],
+            "output_step": self.output_step,
+            "output_name": self.output_name,
+            "analysis_cache_ids": self.analysis_cache_ids,
+            "input_hashes": self.input_hashes,
+            "created_at": self.created_at,
+            "metadata": self.metadata,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "ExecutionPlan":
+        inputs = [PlanInput.from_dict(i) for i in data.get("inputs", [])]
+        return cls(
+            plan_id=data.get("plan_id"),
+            name=data.get("name"),
+            recipe_id=data["recipe_id"],
+            recipe_name=data.get("recipe_name"),
+            recipe_hash=data["recipe_hash"],
+            seed=data.get("seed"),
+            inputs=inputs,
+            steps=[ExecutionStep.from_dict(s) for s in data.get("steps", [])],
+            output_step=data["output_step"],
+            output_name=data.get("output_name"),
+            analysis_cache_ids=data.get("analysis_cache_ids", {}),
+            input_hashes=data.get("input_hashes", {}),
+            created_at=data.get("created_at"),
+            metadata=data.get("metadata", {}),
+        )
+
+    def to_json(self, indent: int = 2) -> str:
+        return json.dumps(self.to_dict(), indent=indent)
+
+    @classmethod
+    def from_json(cls, json_str: str) -> "ExecutionPlan":
+        return cls.from_dict(json.loads(json_str))
+
+    def summary(self) -> str:
+        """Get a human-readable summary of the plan."""
+        by_level = self.get_steps_by_level()
+        max_level = max(by_level.keys()) if by_level else 0
+
+        lines = [
+            f"Execution Plan: {self.plan_id[:16]}...",
+            f"Recipe: {self.recipe_id}",
+            f"Steps: {len(self.steps)}",
+            f"Levels: {max_level + 1}",
+            "",
+        ]
+
+        for level in sorted(by_level.keys()):
+            steps = by_level[level]
+            lines.append(f"Level {level}: ({len(steps)} steps, can run in parallel)")
+            for step in steps:
+                cache_status = f"[{step.cache_id[:8]}...]" if step.cache_id else "[no cache_id]"
+                lines.append(f"  - {step.step_id}: {step.node_type} {cache_status}")
+
+        return "\n".join(lines)
--- a/core/artdag/planning/tree_reduction.py
+++ b/core/artdag/planning/tree_reduction.py
@@ -0,0 +1,231 @@
+# artdag/planning/tree_reduction.py
+"""
+Tree reduction for parallel composition.
+
+Instead of sequential pairwise composition:
+    A → AB → ABC → ABCD  (3 sequential steps)
+
+Use parallel tree reduction:
+    A ─┬─ AB ─┬─ ABCD
+    B ─┘      │
+    C ─┬─ CD ─┘
+    D ─┘
+
+This reduces O(N) to O(log N) levels of sequential dependency.
+"""
+
+import math
+from dataclasses import dataclass
+from typing import List, Tuple, Any, Dict
+
+
+@dataclass
+class ReductionNode:
+    """A node in the reduction tree."""
+    node_id: str
+    input_ids: List[str]
+    level: int
+    position: int  # Position within level
+
+
+class TreeReducer:
+    """
+    Generates tree reduction plans for parallel composition.
+
+    Used to convert N inputs into optimal parallel SEQUENCE operations.
+    """
+
+    def __init__(self, node_type: str = "SEQUENCE"):
+        """
+        Initialize the reducer.
+
+        Args:
+            node_type: The composition node type (SEQUENCE, AUDIO_MIX, etc.)
+        """
+        self.node_type = node_type
+
+    def reduce(
+        self,
+        input_ids: List[str],
+        id_prefix: str = "reduce",
+    ) -> Tuple[List[ReductionNode], str]:
+        """
+        Generate a tree reduction plan for the given inputs.
+
+        Args:
+            input_ids: List of input step IDs to reduce
+            id_prefix: Prefix for generated node IDs
+
+        Returns:
+            Tuple of (list of reduction nodes, final output node ID)
+        """
+        if len(input_ids) == 0:
+            raise ValueError("Cannot reduce empty input list")
+
+        if len(input_ids) == 1:
+            # Single input, no reduction needed
+            return [], input_ids[0]
+
+        if len(input_ids) == 2:
+            # Two inputs, single reduction
+            node_id = f"{id_prefix}_final"
+            node = ReductionNode(
+                node_id=node_id,
+                input_ids=input_ids,
+                level=0,
+                position=0,
+            )
+            return [node], node_id
+
+        # Build tree levels
+        nodes = []
+        current_level = list(input_ids)
+        level_num = 0
+
+        while len(current_level) > 1:
+            next_level = []
+            position = 0
+
+            # Pair up nodes at current level
+            i = 0
+            while i < len(current_level):
+                if i + 1 < len(current_level):
+                    # Pair available
+                    left = current_level[i]
+                    right = current_level[i + 1]
+                    node_id = f"{id_prefix}_L{level_num}_P{position}"
+                    node = ReductionNode(
+                        node_id=node_id,
+                        input_ids=[left, right],
+                        level=level_num,
+                        position=position,
+                    )
+                    nodes.append(node)
+                    next_level.append(node_id)
+                    i += 2
+                else:
+                    # Odd one out, promote to next level
+                    next_level.append(current_level[i])
+                    i += 1
+
+                position += 1
+
+            current_level = next_level
+            level_num += 1
+
+        # The last remaining node is the output
+        output_id = current_level[0]
+
+        # Rename final node for clarity
+        if nodes and nodes[-1].node_id == output_id:
+            nodes[-1].node_id = f"{id_prefix}_final"
+            output_id = f"{id_prefix}_final"
+
+        return nodes, output_id
+
+    def get_reduction_depth(self, n: int) -> int:
+        """
+        Calculate the number of reduction levels needed.
+
+        Args:
+            n: Number of inputs
+
+        Returns:
+            Number of sequential reduction levels (log2(n) ceiling)
+        """
+        if n <= 1:
+            return 0
+        return math.ceil(math.log2(n))
+
+    def get_total_operations(self, n: int) -> int:
+        """
+        Calculate total number of reduction operations.
+
+        Args:
+            n: Number of inputs
+
+        Returns:
+            Total composition operations (always n-1)
+        """
+        return max(0, n - 1)
+
+    def reduce_with_config(
+        self,
+        input_ids: List[str],
+        base_config: Dict[str, Any],
+        id_prefix: str = "reduce",
+    ) -> Tuple[List[Tuple[ReductionNode, Dict[str, Any]]], str]:
+        """
+        Generate reduction plan with configuration for each node.
+
+        Args:
+            input_ids: List of input step IDs
+            base_config: Base configuration to use for each reduction
+            id_prefix: Prefix for generated node IDs
+
+        Returns:
+            Tuple of (list of (node, config) pairs, final output ID)
+        """
+        nodes, output_id = self.reduce(input_ids, id_prefix)
+        result = [(node, dict(base_config)) for node in nodes]
+        return result, output_id
+
+
+def reduce_sequence(
+    input_ids: List[str],
+    transition_config: Dict[str, Any] = None,
+    id_prefix: str = "seq",
+) -> Tuple[List[Tuple[str, List[str], Dict[str, Any]]], str]:
+    """
+    Convenience function for SEQUENCE reduction.
+
+    Args:
+        input_ids: Input step IDs to sequence
+        transition_config: Transition configuration (default: cut)
+        id_prefix: Prefix for generated step IDs
+
+    Returns:
+        Tuple of (list of (step_id, inputs, config), final step ID)
+    """
+    if transition_config is None:
+        transition_config = {"transition": {"type": "cut"}}
+
+    reducer = TreeReducer("SEQUENCE")
+    nodes, output_id = reducer.reduce(input_ids, id_prefix)
+
+    result = [
+        (node.node_id, node.input_ids, dict(transition_config))
+        for node in nodes
+    ]
+
+    return result, output_id
+
+
+def reduce_audio_mix(
+    input_ids: List[str],
+    mix_config: Dict[str, Any] = None,
+    id_prefix: str = "mix",
+) -> Tuple[List[Tuple[str, List[str], Dict[str, Any]]], str]:
+    """
+    Convenience function for AUDIO_MIX reduction.
+
+    Args:
+        input_ids: Input step IDs to mix
+        mix_config: Mix configuration
+        id_prefix: Prefix for generated step IDs
+
+    Returns:
+        Tuple of (list of (step_id, inputs, config), final step ID)
+    """
+    if mix_config is None:
+        mix_config = {"normalize": True}
+
+    reducer = TreeReducer("AUDIO_MIX")
+    nodes, output_id = reducer.reduce(input_ids, id_prefix)
+
+    result = [
+        (node.node_id, node.input_ids, dict(mix_config))
+        for node in nodes
+    ]
+
+    return result, output_id
--- a/core/artdag/registry/init.py
+++ b/core/artdag/registry/init.py
@@ -0,0 +1,20 @@
+# primitive/registry/__init__.py
+"""
+Art DAG Registry.
+
+The registry is the foundational data structure that maps named assets
+to their source paths or content-addressed IDs. Assets in the registry
+can be referenced by DAGs.
+
+Example:
+    registry = Registry("/path/to/registry")
+    registry.add("cat", "/path/to/cat.jpg", tags=["animal", "photo"])
+
+    # Later, in a DAG:
+    builder = DAGBuilder()
+    cat = builder.source(registry.get("cat").path)
+"""
+
+from .registry import Registry, Asset
+
+__all__ = ["Registry", "Asset"]
--- a/core/artdag/registry/registry.py
+++ b/core/artdag/registry/registry.py
@@ -0,0 +1,294 @@
+# primitive/registry/registry.py
+"""
+Asset registry for the Art DAG.
+
+The registry stores named assets with metadata, enabling:
+- Named references to source files
+- Tagging and categorization
+- Content-addressed deduplication
+- Asset discovery and search
+"""
+
+import hashlib
+import json
+import shutil
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+
+def _file_hash(path: Path, algorithm: str = "sha3_256") -> str:
+    """
+    Compute content hash of a file.
+
+    Uses SHA-3 (Keccak) by default for quantum resistance.
+    SHA-3-256 provides 128-bit security against quantum attacks (Grover's algorithm).
+
+    Args:
+        path: File to hash
+        algorithm: Hash algorithm (sha3_256, sha3_512, sha256, blake2b)
+
+    Returns:
+        Full hex digest (no truncation)
+    """
+    hasher = hashlib.new(algorithm)
+    with open(path, "rb") as f:
+        for chunk in iter(lambda: f.read(65536), b""):
+            hasher.update(chunk)
+    return hasher.hexdigest()
+
+
+@dataclass
+class Asset:
+    """
+    A registered asset in the Art DAG.
+
+    The cid is the true identifier. URL and local_path are
+    locations where the content can be fetched.
+
+    Attributes:
+        name: Unique name for the asset
+        cid: SHA-3-256 hash - the canonical identifier
+        url: Public URL (canonical location)
+        local_path: Optional local path (for local execution)
+        asset_type: Type of asset (image, video, audio, etc.)
+        tags: List of tags for categorization
+        metadata: Additional metadata (dimensions, duration, etc.)
+        created_at: Timestamp when added to registry
+    """
+    name: str
+    cid: str
+    url: Optional[str] = None
+    local_path: Optional[Path] = None
+    asset_type: str = "unknown"
+    tags: List[str] = field(default_factory=list)
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    created_at: float = field(default_factory=time.time)
+
+    @property
+    def path(self) -> Optional[Path]:
+        """Backwards compatible path property."""
+        return self.local_path
+
+    def to_dict(self) -> Dict[str, Any]:
+        data = {
+            "name": self.name,
+            "cid": self.cid,
+            "asset_type": self.asset_type,
+            "tags": self.tags,
+            "metadata": self.metadata,
+            "created_at": self.created_at,
+        }
+        if self.url:
+            data["url"] = self.url
+        if self.local_path:
+            data["local_path"] = str(self.local_path)
+        return data
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Asset":
+        local_path = data.get("local_path") or data.get("path")  # backwards compat
+        return cls(
+            name=data["name"],
+            cid=data["cid"],
+            url=data.get("url"),
+            local_path=Path(local_path) if local_path else None,
+            asset_type=data.get("asset_type", "unknown"),
+            tags=data.get("tags", []),
+            metadata=data.get("metadata", {}),
+            created_at=data.get("created_at", time.time()),
+        )
+
+
+class Registry:
+    """
+    The Art DAG registry.
+
+    Stores named assets that can be referenced by DAGs.
+
+    Structure:
+        registry_dir/
+            registry.json     # Index of all assets
+            assets/           # Optional: copied asset files
+                <hash>/
+                    <filename>
+    """
+
+    def __init__(self, registry_dir: Path | str, copy_assets: bool = False):
+        """
+        Initialize the registry.
+
+        Args:
+            registry_dir: Directory to store registry data
+            copy_assets: If True, copy assets into registry (content-addressed)
+        """
+        self.registry_dir = Path(registry_dir)
+        self.registry_dir.mkdir(parents=True, exist_ok=True)
+        self.copy_assets = copy_assets
+        self._assets: Dict[str, Asset] = {}
+        self._load()
+
+    def _index_path(self) -> Path:
+        return self.registry_dir / "registry.json"
+
+    def _assets_dir(self) -> Path:
+        return self.registry_dir / "assets"
+
+    def _load(self):
+        """Load registry from disk."""
+        index_path = self._index_path()
+        if index_path.exists():
+            with open(index_path) as f:
+                data = json.load(f)
+            self._assets = {
+                name: Asset.from_dict(asset_data)
+                for name, asset_data in data.get("assets", {}).items()
+            }
+
+    def _save(self):
+        """Save registry to disk."""
+        data = {
+            "version": "1.0",
+            "assets": {name: asset.to_dict() for name, asset in self._assets.items()},
+        }
+        with open(self._index_path(), "w") as f:
+            json.dump(data, f, indent=2)
+
+    def add(
+        self,
+        name: str,
+        cid: str,
+        url: str = None,
+        local_path: Path | str = None,
+        asset_type: str = None,
+        tags: List[str] = None,
+        metadata: Dict[str, Any] = None,
+    ) -> Asset:
+        """
+        Add an asset to the registry.
+
+        Args:
+            name: Unique name for the asset
+            cid: SHA-3-256 hash of the content (the canonical identifier)
+            url: Public URL where the asset can be fetched
+            local_path: Optional local path (for local execution)
+            asset_type: Type of asset (image, video, audio, etc.)
+            tags: List of tags for categorization
+            metadata: Additional metadata
+
+        Returns:
+            The created Asset
+        """
+        # Auto-detect asset type from URL or path extension
+        if asset_type is None:
+            ext = None
+            if url:
+                ext = Path(url.split("?")[0]).suffix.lower()
+            elif local_path:
+                ext = Path(local_path).suffix.lower()
+            if ext:
+                type_map = {
+                    ".jpg": "image", ".jpeg": "image", ".png": "image",
+                    ".gif": "image", ".webp": "image", ".bmp": "image",
+                    ".mp4": "video", ".mkv": "video", ".avi": "video",
+                    ".mov": "video", ".webm": "video",
+                    ".mp3": "audio", ".wav": "audio", ".flac": "audio",
+                    ".ogg": "audio", ".aac": "audio",
+                }
+                asset_type = type_map.get(ext, "unknown")
+            else:
+                asset_type = "unknown"
+
+        asset = Asset(
+            name=name,
+            cid=cid,
+            url=url,
+            local_path=Path(local_path).resolve() if local_path else None,
+            asset_type=asset_type,
+            tags=tags or [],
+            metadata=metadata or {},
+        )
+
+        self._assets[name] = asset
+        self._save()
+        return asset
+
+    def add_from_file(
+        self,
+        name: str,
+        path: Path | str,
+        url: str = None,
+        asset_type: str = None,
+        tags: List[str] = None,
+        metadata: Dict[str, Any] = None,
+    ) -> Asset:
+        """
+        Add an asset from a local file (computes hash automatically).
+
+        Args:
+            name: Unique name for the asset
+            path: Path to the source file
+            url: Optional public URL
+            asset_type: Type of asset (auto-detected if not provided)
+            tags: List of tags for categorization
+            metadata: Additional metadata
+
+        Returns:
+            The created Asset
+        """
+        path = Path(path)
+        if not path.exists():
+            raise FileNotFoundError(f"Asset file not found: {path}")
+
+        cid = _file_hash(path)
+
+        return self.add(
+            name=name,
+            cid=cid,
+            url=url,
+            local_path=path,
+            asset_type=asset_type,
+            tags=tags,
+            metadata=metadata,
+        )
+
+    def get(self, name: str) -> Optional[Asset]:
+        """Get an asset by name."""
+        return self._assets.get(name)
+
+    def remove(self, name: str) -> bool:
+        """Remove an asset from the registry."""
+        if name not in self._assets:
+            return False
+        del self._assets[name]
+        self._save()
+        return True
+
+    def list(self) -> List[Asset]:
+        """List all assets."""
+        return list(self._assets.values())
+
+    def find_by_tag(self, tag: str) -> List[Asset]:
+        """Find assets with a specific tag."""
+        return [a for a in self._assets.values() if tag in a.tags]
+
+    def find_by_type(self, asset_type: str) -> List[Asset]:
+        """Find assets of a specific type."""
+        return [a for a in self._assets.values() if a.asset_type == asset_type]
+
+    def find_by_hash(self, cid: str) -> Optional[Asset]:
+        """Find an asset by content hash."""
+        for asset in self._assets.values():
+            if asset.cid == cid:
+                return asset
+        return None
+
+    def __contains__(self, name: str) -> bool:
+        return name in self._assets
+
+    def __len__(self) -> int:
+        return len(self._assets)
+
+    def __iter__(self):
+        return iter(self._assets.values())
--- a/core/artdag/server.py
+++ b/core/artdag/server.py
@@ -0,0 +1,253 @@
+# primitive/server.py
+"""
+HTTP server for primitive execution engine.
+
+Provides a REST API for submitting DAGs and retrieving results.
+
+Endpoints:
+    POST /execute     - Submit DAG for execution
+    GET  /status/:id  - Get execution status
+    GET  /result/:id  - Get execution result
+    GET  /cache/stats - Get cache statistics
+    DELETE /cache     - Clear cache
+"""
+
+import json
+import logging
+import threading
+import uuid
+from dataclasses import dataclass, field
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from pathlib import Path
+from typing import Any, Dict, Optional
+from urllib.parse import urlparse
+
+from .dag import DAG
+from .engine import Engine, ExecutionResult
+from . import nodes  # Register built-in executors
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Job:
+    """A pending or completed execution job."""
+    job_id: str
+    dag: DAG
+    status: str = "pending"  # pending, running, completed, failed
+    result: Optional[ExecutionResult] = None
+    error: Optional[str] = None
+
+
+class PrimitiveServer:
+    """
+    HTTP server for the primitive engine.
+
+    Usage:
+        server = PrimitiveServer(cache_dir="/tmp/primitive_cache", port=8080)
+        server.start()  # Blocking
+    """
+
+    def __init__(self, cache_dir: Path | str, host: str = "127.0.0.1", port: int = 8080):
+        self.cache_dir = Path(cache_dir)
+        self.host = host
+        self.port = port
+        self.engine = Engine(self.cache_dir)
+        self.jobs: Dict[str, Job] = {}
+        self._lock = threading.Lock()
+
+    def submit_job(self, dag: DAG) -> str:
+        """Submit a DAG for execution, return job ID."""
+        job_id = str(uuid.uuid4())[:8]
+        job = Job(job_id=job_id, dag=dag)
+
+        with self._lock:
+            self.jobs[job_id] = job
+
+        # Execute in background thread
+        thread = threading.Thread(target=self._execute_job, args=(job_id,))
+        thread.daemon = True
+        thread.start()
+
+        return job_id
+
+    def _execute_job(self, job_id: str):
+        """Execute a job in background."""
+        with self._lock:
+            job = self.jobs.get(job_id)
+            if not job:
+                return
+            job.status = "running"
+
+        try:
+            result = self.engine.execute(job.dag)
+            with self._lock:
+                job.result = result
+                job.status = "completed" if result.success else "failed"
+                if not result.success:
+                    job.error = result.error
+        except Exception as e:
+            logger.exception(f"Job {job_id} failed")
+            with self._lock:
+                job.status = "failed"
+                job.error = str(e)
+
+    def get_job(self, job_id: str) -> Optional[Job]:
+        """Get job by ID."""
+        with self._lock:
+            return self.jobs.get(job_id)
+
+    def _create_handler(server_instance):
+        """Create request handler with access to server instance."""
+
+        class RequestHandler(BaseHTTPRequestHandler):
+            server_ref = server_instance
+
+            def log_message(self, format, *args):
+                logger.debug(format % args)
+
+            def _send_json(self, data: Any, status: int = 200):
+                self.send_response(status)
+                self.send_header("Content-Type", "application/json")
+                self.end_headers()
+                self.wfile.write(json.dumps(data).encode())
+
+            def _send_error(self, message: str, status: int = 400):
+                self._send_json({"error": message}, status)
+
+            def do_GET(self):
+                parsed = urlparse(self.path)
+                path = parsed.path
+
+                if path.startswith("/status/"):
+                    job_id = path[8:]
+                    job = self.server_ref.get_job(job_id)
+                    if not job:
+                        self._send_error("Job not found", 404)
+                        return
+                    self._send_json({
+                        "job_id": job.job_id,
+                        "status": job.status,
+                        "error": job.error,
+                    })
+
+                elif path.startswith("/result/"):
+                    job_id = path[8:]
+                    job = self.server_ref.get_job(job_id)
+                    if not job:
+                        self._send_error("Job not found", 404)
+                        return
+                    if job.status == "pending" or job.status == "running":
+                        self._send_json({
+                            "job_id": job.job_id,
+                            "status": job.status,
+                            "ready": False,
+                        })
+                        return
+
+                    result = job.result
+                    self._send_json({
+                        "job_id": job.job_id,
+                        "status": job.status,
+                        "ready": True,
+                        "success": result.success if result else False,
+                        "output_path": str(result.output_path) if result and result.output_path else None,
+                        "error": job.error,
+                        "execution_time": result.execution_time if result else 0,
+                        "nodes_executed": result.nodes_executed if result else 0,
+                        "nodes_cached": result.nodes_cached if result else 0,
+                    })
+
+                elif path == "/cache/stats":
+                    stats = self.server_ref.engine.get_cache_stats()
+                    self._send_json({
+                        "total_entries": stats.total_entries,
+                        "total_size_bytes": stats.total_size_bytes,
+                        "hits": stats.hits,
+                        "misses": stats.misses,
+                        "hit_rate": stats.hit_rate,
+                    })
+
+                elif path == "/health":
+                    self._send_json({"status": "ok"})
+
+                else:
+                    self._send_error("Not found", 404)
+
+            def do_POST(self):
+                if self.path == "/execute":
+                    try:
+                        content_length = int(self.headers.get("Content-Length", 0))
+                        body = self.rfile.read(content_length).decode()
+                        data = json.loads(body)
+
+                        dag = DAG.from_dict(data)
+                        job_id = self.server_ref.submit_job(dag)
+
+                        self._send_json({
+                            "job_id": job_id,
+                            "status": "pending",
+                        })
+                    except json.JSONDecodeError as e:
+                        self._send_error(f"Invalid JSON: {e}")
+                    except Exception as e:
+                        self._send_error(str(e), 500)
+                else:
+                    self._send_error("Not found", 404)
+
+            def do_DELETE(self):
+                if self.path == "/cache":
+                    self.server_ref.engine.clear_cache()
+                    self._send_json({"status": "cleared"})
+                else:
+                    self._send_error("Not found", 404)
+
+        return RequestHandler
+
+    def start(self):
+        """Start the HTTP server (blocking)."""
+        handler = self._create_handler()
+        server = HTTPServer((self.host, self.port), handler)
+        logger.info(f"Primitive server starting on {self.host}:{self.port}")
+        print(f"Primitive server running on http://{self.host}:{self.port}")
+        try:
+            server.serve_forever()
+        except KeyboardInterrupt:
+            print("\nShutting down...")
+            server.shutdown()
+
+    def start_background(self) -> threading.Thread:
+        """Start the server in a background thread."""
+        thread = threading.Thread(target=self.start)
+        thread.daemon = True
+        thread.start()
+        return thread
+
+
+def main():
+    """CLI entry point."""
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Primitive execution server")
+    parser.add_argument("--host", default="127.0.0.1", help="Host to bind to")
+    parser.add_argument("--port", type=int, default=8080, help="Port to bind to")
+    parser.add_argument("--cache-dir", default="/tmp/primitive_cache", help="Cache directory")
+    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose logging")
+
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        level=logging.DEBUG if args.verbose else logging.INFO,
+        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    )
+
+    server = PrimitiveServer(
+        cache_dir=args.cache_dir,
+        host=args.host,
+        port=args.port,
+    )
+    server.start()
+
+
+if __name__ == "__main__":
+    main()
--- a/core/artdag/sexp/init.py
+++ b/core/artdag/sexp/init.py
@@ -0,0 +1,75 @@
+"""
+S-expression parsing, compilation, and planning for ArtDAG.
+
+This module provides:
+- parser: Parse S-expression text into Python data structures
+- compiler: Compile recipe S-expressions into DAG format
+- planner: Generate execution plans from recipes
+"""
+
+from .parser import (
+    parse,
+    parse_all,
+    serialize,
+    Symbol,
+    Keyword,
+    ParseError,
+)
+
+from .compiler import (
+    compile_recipe,
+    compile_string,
+    CompiledRecipe,
+    CompileError,
+    ParamDef,
+    _parse_params,
+)
+
+from .planner import (
+    create_plan,
+    ExecutionPlanSexp,
+    PlanStep,
+    step_to_task_sexp,
+    task_cache_id,
+)
+
+from .scheduler import (
+    PlanScheduler,
+    PlanResult,
+    StepResult,
+    schedule_plan,
+    step_to_sexp,
+    step_sexp_to_string,
+    verify_step_cache_id,
+)
+
+__all__ = [
+    # Parser
+    'parse',
+    'parse_all',
+    'serialize',
+    'Symbol',
+    'Keyword',
+    'ParseError',
+    # Compiler
+    'compile_recipe',
+    'compile_string',
+    'CompiledRecipe',
+    'CompileError',
+    'ParamDef',
+    '_parse_params',
+    # Planner
+    'create_plan',
+    'ExecutionPlanSexp',
+    'PlanStep',
+    'step_to_task_sexp',
+    'task_cache_id',
+    # Scheduler
+    'PlanScheduler',
+    'PlanResult',
+    'StepResult',
+    'schedule_plan',
+    'step_to_sexp',
+    'step_sexp_to_string',
+    'verify_step_cache_id',
+]
--- a/core/artdag/sexp/compiler.py
+++ b/core/artdag/sexp/compiler.py
--- a/core/artdag/sexp/effect_loader.py
+++ b/core/artdag/sexp/effect_loader.py
@@ -0,0 +1,337 @@
+"""
+Sexp effect loader.
+
+Loads sexp effect definitions (define-effect forms) and creates
+frame processors that evaluate the sexp body with primitives.
+
+Effects must use :params syntax:
+
+    (define-effect name
+      :params (
+        (param1 :type int :default 8 :range [4 32] :desc "description")
+        (param2 :type string :default "value" :desc "description")
+      )
+      body)
+
+For effects with no parameters, use empty :params ():
+
+    (define-effect name
+      :params ()
+      body)
+
+Unknown parameters passed to effects will raise an error.
+
+Usage:
+    loader = SexpEffectLoader()
+    effect_fn = loader.load_effect_file(Path("effects/ascii_art.sexp"))
+    output = effect_fn(input_path, output_path, config)
+"""
+
+import logging
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional
+
+import numpy as np
+
+from .parser import parse_all, Symbol, Keyword
+from .evaluator import evaluate
+from .primitives import PRIMITIVES
+from .compiler import ParamDef, _parse_params, CompileError
+
+logger = logging.getLogger(__name__)
+
+
+def _parse_define_effect(sexp) -> tuple:
+    """
+    Parse a define-effect form.
+
+    Required syntax:
+        (define-effect name
+          :params (
+            (param1 :type int :default 8 :range [4 32] :desc "description")
+          )
+          body)
+
+    Effects MUST use :params syntax. Legacy ((param default) ...) syntax is not supported.
+
+    Returns (name, params_with_defaults, param_defs, body)
+    where param_defs is a list of ParamDef objects
+    """
+    if not isinstance(sexp, list) or len(sexp) < 3:
+        raise ValueError(f"Invalid define-effect form: {sexp}")
+
+    head = sexp[0]
+    if not (isinstance(head, Symbol) and head.name == "define-effect"):
+        raise ValueError(f"Expected define-effect, got {head}")
+
+    name = sexp[1]
+    if isinstance(name, Symbol):
+        name = name.name
+
+    params_with_defaults = {}
+    param_defs: List[ParamDef] = []
+    body = None
+    found_params = False
+
+    # Parse :params and body
+    i = 2
+    while i < len(sexp):
+        item = sexp[i]
+        if isinstance(item, Keyword) and item.name == "params":
+            # :params syntax
+            if i + 1 >= len(sexp):
+                raise ValueError(f"Effect '{name}': Missing params list after :params keyword")
+            try:
+                param_defs = _parse_params(sexp[i + 1])
+                # Build params_with_defaults from ParamDef objects
+                for pd in param_defs:
+                    params_with_defaults[pd.name] = pd.default
+            except CompileError as e:
+                raise ValueError(f"Effect '{name}': Error parsing :params: {e}")
+            found_params = True
+            i += 2
+        elif isinstance(item, Keyword):
+            # Skip other keywords we don't recognize
+            i += 2
+        elif body is None:
+            # First non-keyword item is the body
+            if isinstance(item, list) and item:
+                first_elem = item[0]
+                # Check for legacy syntax and reject it
+                if isinstance(first_elem, list) and len(first_elem) >= 2:
+                    raise ValueError(
+                        f"Effect '{name}': Legacy parameter syntax ((name default) ...) is not supported. "
+                        f"Use :params block instead:\n"
+                        f"  :params (\n"
+                        f"    (param_name :type int :default 0 :desc \"description\")\n"
+                        f"  )"
+                    )
+            body = item
+            i += 1
+        else:
+            i += 1
+
+    if body is None:
+        raise ValueError(f"Effect '{name}': No body found")
+
+    if not found_params:
+        raise ValueError(
+            f"Effect '{name}': Missing :params block. Effects must declare parameters.\n"
+            f"For effects with no parameters, use empty :params ():\n"
+            f"  (define-effect {name}\n"
+            f"    :params ()\n"
+            f"    body)"
+        )
+
+    return name, params_with_defaults, param_defs, body
+
+
+def _create_process_frame(
+    effect_name: str,
+    params_with_defaults: Dict[str, Any],
+    param_defs: List[ParamDef],
+    body: Any,
+) -> Callable:
+    """
+    Create a process_frame function that evaluates the sexp body.
+
+    The function signature is: (frame, params, state) -> (frame, state)
+    """
+    import math
+
+    def process_frame(frame: np.ndarray, params: Dict[str, Any], state: Any):
+        """Evaluate sexp effect body on a frame."""
+        # Build environment with primitives
+        env = dict(PRIMITIVES)
+
+        # Add math functions
+        env["floor"] = lambda x: int(math.floor(x))
+        env["ceil"] = lambda x: int(math.ceil(x))
+        env["round"] = lambda x: int(round(x))
+        env["abs"] = abs
+        env["min"] = min
+        env["max"] = max
+        env["sqrt"] = math.sqrt
+        env["sin"] = math.sin
+        env["cos"] = math.cos
+
+        # Add list operations
+        env["list"] = lambda *args: tuple(args)
+        env["nth"] = lambda coll, i: coll[int(i)] if coll else None
+
+        # Bind frame
+        env["frame"] = frame
+
+        # Validate that all provided params are known
+        known_params = set(params_with_defaults.keys())
+        for k in params.keys():
+            if k not in known_params:
+                raise ValueError(
+                    f"Effect '{effect_name}': Unknown parameter '{k}'. "
+                    f"Valid parameters are: {', '.join(sorted(known_params)) if known_params else '(none)'}"
+                )
+
+        # Bind parameters (defaults + overrides from config)
+        for param_name, default in params_with_defaults.items():
+            # Use config value if provided, otherwise default
+            if param_name in params:
+                env[param_name] = params[param_name]
+            elif default is not None:
+                env[param_name] = default
+
+        # Evaluate the body
+        try:
+            result = evaluate(body, env)
+            if isinstance(result, np.ndarray):
+                return result, state
+            else:
+                logger.warning(f"Effect {effect_name} returned {type(result)}, expected ndarray")
+                return frame, state
+        except Exception as e:
+            logger.error(f"Error evaluating effect {effect_name}: {e}")
+            raise
+
+    return process_frame
+
+
+def load_sexp_effect(source: str, base_path: Optional[Path] = None) -> tuple:
+    """
+    Load a sexp effect from source code.
+
+    Args:
+        source: Sexp source code
+        base_path: Base path for resolving relative imports
+
+    Returns:
+        (effect_name, process_frame_fn, params_with_defaults, param_defs)
+        where param_defs is a list of ParamDef objects for introspection
+    """
+    exprs = parse_all(source)
+
+    # Find define-effect form
+    define_effect = None
+    if isinstance(exprs, list):
+        for expr in exprs:
+            if isinstance(expr, list) and expr and isinstance(expr[0], Symbol):
+                if expr[0].name == "define-effect":
+                    define_effect = expr
+                    break
+    elif isinstance(exprs, list) and exprs and isinstance(exprs[0], Symbol):
+        if exprs[0].name == "define-effect":
+            define_effect = exprs
+
+    if not define_effect:
+        raise ValueError("No define-effect form found in sexp effect")
+
+    name, params_with_defaults, param_defs, body = _parse_define_effect(define_effect)
+    process_frame = _create_process_frame(name, params_with_defaults, param_defs, body)
+
+    return name, process_frame, params_with_defaults, param_defs
+
+
+def load_sexp_effect_file(path: Path) -> tuple:
+    """
+    Load a sexp effect from file.
+
+    Returns:
+        (effect_name, process_frame_fn, params_with_defaults, param_defs)
+        where param_defs is a list of ParamDef objects for introspection
+    """
+    source = path.read_text()
+    return load_sexp_effect(source, base_path=path.parent)
+
+
+class SexpEffectLoader:
+    """
+    Loader for sexp effect definitions.
+
+    Creates effect functions compatible with the EffectExecutor.
+    """
+
+    def __init__(self, recipe_dir: Optional[Path] = None):
+        """
+        Initialize loader.
+
+        Args:
+            recipe_dir: Base directory for resolving relative effect paths
+        """
+        self.recipe_dir = recipe_dir or Path.cwd()
+        # Cache loaded effects with their param_defs for introspection
+        self._loaded_effects: Dict[str, tuple] = {}
+
+    def load_effect_path(self, effect_path: str) -> Callable:
+        """
+        Load a sexp effect from a relative path.
+
+        Args:
+            effect_path: Relative path to effect .sexp file
+
+        Returns:
+            Effect function (input_path, output_path, config) -> output_path
+        """
+        from ..effects.frame_processor import process_video
+
+        full_path = self.recipe_dir / effect_path
+        if not full_path.exists():
+            raise FileNotFoundError(f"Sexp effect not found: {full_path}")
+
+        name, process_frame_fn, params_defaults, param_defs = load_sexp_effect_file(full_path)
+        logger.info(f"Loaded sexp effect: {name} from {effect_path}")
+
+        # Cache for introspection
+        self._loaded_effects[effect_path] = (name, params_defaults, param_defs)
+
+        def effect_fn(input_path: Path, output_path: Path, config: Dict[str, Any]) -> Path:
+            """Run sexp effect via frame processor."""
+            # Extract params (excluding internal keys)
+            params = dict(params_defaults)  # Start with defaults
+            for k, v in config.items():
+                if k not in ("effect", "cid", "hash", "effect_path", "_binding"):
+                    params[k] = v
+
+            # Get bindings if present
+            bindings = {}
+            for key, value in config.items():
+                if isinstance(value, dict) and value.get("_resolved_values"):
+                    bindings[key] = value["_resolved_values"]
+
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            actual_output = output_path.with_suffix(".mp4")
+
+            process_video(
+                input_path=input_path,
+                output_path=actual_output,
+                process_frame=process_frame_fn,
+                params=params,
+                bindings=bindings,
+            )
+
+            logger.info(f"Processed sexp effect '{name}' from {effect_path}")
+            return actual_output
+
+        return effect_fn
+
+    def get_effect_params(self, effect_path: str) -> List[ParamDef]:
+        """
+        Get parameter definitions for an effect.
+
+        Args:
+            effect_path: Relative path to effect .sexp file
+
+        Returns:
+            List of ParamDef objects describing the effect's parameters
+        """
+        if effect_path not in self._loaded_effects:
+            # Load the effect to get its params
+            full_path = self.recipe_dir / effect_path
+            if not full_path.exists():
+                raise FileNotFoundError(f"Sexp effect not found: {full_path}")
+            name, _, params_defaults, param_defs = load_sexp_effect_file(full_path)
+            self._loaded_effects[effect_path] = (name, params_defaults, param_defs)
+
+        return self._loaded_effects[effect_path][2]
+
+
+def get_sexp_effect_loader(recipe_dir: Optional[Path] = None) -> SexpEffectLoader:
+    """Get a sexp effect loader instance."""
+    return SexpEffectLoader(recipe_dir)
--- a/core/artdag/sexp/evaluator.py
+++ b/core/artdag/sexp/evaluator.py
@@ -0,0 +1,869 @@
+"""
+Expression evaluator for S-expression DSL.
+
+Supports:
+- Arithmetic: +, -, *, /, mod, sqrt, pow, abs, floor, ceil, round, min, max, clamp
+- Comparison: =, <, >, <=, >=
+- Logic: and, or, not
+- Predicates: odd?, even?, zero?, nil?
+- Conditionals: if, cond, case
+- Data: list, dict/map construction, get
+- Lambda calls
+"""
+
+from typing import Any, Dict, List, Callable
+from .parser import Symbol, Keyword, Lambda, Binding
+
+
+class EvalError(Exception):
+    """Error during expression evaluation."""
+    pass
+
+
+# Built-in functions
+BUILTINS: Dict[str, Callable] = {}
+
+
+def builtin(name: str):
+    """Decorator to register a builtin function."""
+    def decorator(fn):
+        BUILTINS[name] = fn
+        return fn
+    return decorator
+
+
+@builtin("+")
+def add(*args):
+    return sum(args)
+
+
+@builtin("-")
+def sub(a, b=None):
+    if b is None:
+        return -a
+    return a - b
+
+
+@builtin("*")
+def mul(*args):
+    result = 1
+    for a in args:
+        result *= a
+    return result
+
+
+@builtin("/")
+def div(a, b):
+    return a / b
+
+
+@builtin("mod")
+def mod(a, b):
+    return a % b
+
+
+@builtin("sqrt")
+def sqrt(x):
+    return x ** 0.5
+
+
+@builtin("pow")
+def power(x, n):
+    return x ** n
+
+
+@builtin("abs")
+def absolute(x):
+    return abs(x)
+
+
+@builtin("floor")
+def floor_fn(x):
+    import math
+    return math.floor(x)
+
+
+@builtin("ceil")
+def ceil_fn(x):
+    import math
+    return math.ceil(x)
+
+
+@builtin("round")
+def round_fn(x, ndigits=0):
+    return round(x, int(ndigits))
+
+
+@builtin("min")
+def min_fn(*args):
+    if len(args) == 1 and isinstance(args[0], (list, tuple)):
+        return min(args[0])
+    return min(args)
+
+
+@builtin("max")
+def max_fn(*args):
+    if len(args) == 1 and isinstance(args[0], (list, tuple)):
+        return max(args[0])
+    return max(args)
+
+
+@builtin("clamp")
+def clamp(x, lo, hi):
+    return max(lo, min(hi, x))
+
+
+@builtin("=")
+def eq(a, b):
+    return a == b
+
+
+@builtin("<")
+def lt(a, b):
+    return a < b
+
+
+@builtin(">")
+def gt(a, b):
+    return a > b
+
+
+@builtin("<=")
+def lte(a, b):
+    return a <= b
+
+
+@builtin(">=")
+def gte(a, b):
+    return a >= b
+
+
+@builtin("odd?")
+def is_odd(n):
+    return n % 2 == 1
+
+
+@builtin("even?")
+def is_even(n):
+    return n % 2 == 0
+
+
+@builtin("zero?")
+def is_zero(n):
+    return n == 0
+
+
+@builtin("nil?")
+def is_nil(x):
+    return x is None
+
+
+@builtin("not")
+def not_fn(x):
+    return not x
+
+
+@builtin("inc")
+def inc(n):
+    return n + 1
+
+
+@builtin("dec")
+def dec(n):
+    return n - 1
+
+
+@builtin("list")
+def make_list(*args):
+    return list(args)
+
+
+@builtin("assert")
+def assert_true(condition, message="Assertion failed"):
+    if not condition:
+        raise RuntimeError(f"Assertion error: {message}")
+    return True
+
+
+@builtin("get")
+def get(coll, key, default=None):
+    if isinstance(coll, dict):
+        # Try the key directly first
+        result = coll.get(key, None)
+        if result is not None:
+            return result
+        # If key is a Keyword, also try its string name (for Python dicts with string keys)
+        if isinstance(key, Keyword):
+            result = coll.get(key.name, None)
+            if result is not None:
+                return result
+        # Return the default
+        return default
+    elif isinstance(coll, list):
+        return coll[key] if 0 <= key < len(coll) else default
+    else:
+        raise EvalError(f"get: expected dict or list, got {type(coll).__name__}: {str(coll)[:100]}")
+
+
+@builtin("dict?")
+def is_dict(x):
+    return isinstance(x, dict)
+
+
+@builtin("list?")
+def is_list(x):
+    return isinstance(x, list)
+
+
+@builtin("nil?")
+def is_nil(x):
+    return x is None
+
+
+@builtin("number?")
+def is_number(x):
+    return isinstance(x, (int, float))
+
+
+@builtin("string?")
+def is_string(x):
+    return isinstance(x, str)
+
+
+@builtin("len")
+def length(coll):
+    return len(coll)
+
+
+@builtin("first")
+def first(coll):
+    return coll[0] if coll else None
+
+
+@builtin("last")
+def last(coll):
+    return coll[-1] if coll else None
+
+
+@builtin("chunk-every")
+def chunk_every(coll, n):
+    """Split collection into chunks of n elements."""
+    n = int(n)
+    return [coll[i:i+n] for i in range(0, len(coll), n)]
+
+
+@builtin("rest")
+def rest(coll):
+    return coll[1:] if coll else []
+
+
+@builtin("nth")
+def nth(coll, n):
+    return coll[n] if 0 <= n < len(coll) else None
+
+
+@builtin("concat")
+def concat(*colls):
+    """Concatenate multiple lists/sequences."""
+    result = []
+    for c in colls:
+        if c is not None:
+            result.extend(c)
+    return result
+
+
+@builtin("cons")
+def cons(x, coll):
+    """Prepend x to collection."""
+    return [x] + list(coll) if coll else [x]
+
+
+@builtin("append")
+def append(coll, x):
+    """Append x to collection."""
+    return list(coll) + [x] if coll else [x]
+
+
+@builtin("range")
+def make_range(start, end, step=1):
+    """Create a range of numbers."""
+    return list(range(int(start), int(end), int(step)))
+
+
+@builtin("zip-pairs")
+def zip_pairs(coll):
+    """Zip consecutive pairs: [a,b,c,d] -> [[a,b],[b,c],[c,d]]."""
+    if not coll or len(coll) < 2:
+        return []
+    return [[coll[i], coll[i+1]] for i in range(len(coll)-1)]
+
+
+@builtin("dict")
+def make_dict(*pairs):
+    """Create dict from key-value pairs: (dict :a 1 :b 2)."""
+    result = {}
+    i = 0
+    while i < len(pairs) - 1:
+        key = pairs[i]
+        if isinstance(key, Keyword):
+            key = key.name
+        result[key] = pairs[i + 1]
+        i += 2
+    return result
+
+
+@builtin("keys")
+def keys(d):
+    """Get the keys of a dict as a list."""
+    if not isinstance(d, dict):
+        raise EvalError(f"keys: expected dict, got {type(d).__name__}")
+    return list(d.keys())
+
+
+@builtin("vals")
+def vals(d):
+    """Get the values of a dict as a list."""
+    if not isinstance(d, dict):
+        raise EvalError(f"vals: expected dict, got {type(d).__name__}")
+    return list(d.values())
+
+
+@builtin("merge")
+def merge(*dicts):
+    """Merge multiple dicts, later dicts override earlier."""
+    result = {}
+    for d in dicts:
+        if d is not None:
+            if not isinstance(d, dict):
+                raise EvalError(f"merge: expected dict, got {type(d).__name__}")
+            result.update(d)
+    return result
+
+
+@builtin("assoc")
+def assoc(d, *pairs):
+    """Associate keys with values in a dict: (assoc d :a 1 :b 2)."""
+    if d is None:
+        result = {}
+    elif isinstance(d, dict):
+        result = dict(d)
+    else:
+        raise EvalError(f"assoc: expected dict or nil, got {type(d).__name__}")
+
+    i = 0
+    while i < len(pairs) - 1:
+        key = pairs[i]
+        if isinstance(key, Keyword):
+            key = key.name
+        result[key] = pairs[i + 1]
+        i += 2
+    return result
+
+
+@builtin("dissoc")
+def dissoc(d, *keys_to_remove):
+    """Remove keys from a dict: (dissoc d :a :b)."""
+    if d is None:
+        return {}
+    if not isinstance(d, dict):
+        raise EvalError(f"dissoc: expected dict or nil, got {type(d).__name__}")
+
+    result = dict(d)
+    for key in keys_to_remove:
+        if isinstance(key, Keyword):
+            key = key.name
+        result.pop(key, None)
+    return result
+
+
+@builtin("into")
+def into(target, coll):
+    """Convert a collection into another collection type.
+
+    (into [] {:a 1 :b 2}) -> [["a" 1] ["b" 2]]
+    (into {} [[:a 1] [:b 2]]) -> {"a": 1, "b": 2}
+    (into [] [1 2 3]) -> [1 2 3]
+    """
+    if isinstance(target, list):
+        if isinstance(coll, dict):
+            return [[k, v] for k, v in coll.items()]
+        elif isinstance(coll, (list, tuple)):
+            return list(coll)
+        else:
+            raise EvalError(f"into: cannot convert {type(coll).__name__} into list")
+    elif isinstance(target, dict):
+        if isinstance(coll, dict):
+            return dict(coll)
+        elif isinstance(coll, (list, tuple)):
+            result = {}
+            for item in coll:
+                if isinstance(item, (list, tuple)) and len(item) >= 2:
+                    key = item[0]
+                    if isinstance(key, Keyword):
+                        key = key.name
+                    result[key] = item[1]
+                else:
+                    raise EvalError(f"into: expected [key value] pairs, got {item}")
+            return result
+        else:
+            raise EvalError(f"into: cannot convert {type(coll).__name__} into dict")
+    else:
+        raise EvalError(f"into: unsupported target type {type(target).__name__}")
+
+
+@builtin("filter")
+def filter_fn(pred, coll):
+    """Filter collection by predicate. Pred must be a lambda."""
+    if not isinstance(pred, Lambda):
+        raise EvalError(f"filter: expected lambda as predicate, got {type(pred).__name__}")
+
+    result = []
+    for item in coll:
+        # Evaluate predicate with item
+        local_env = {}
+        if pred.closure:
+            local_env.update(pred.closure)
+        local_env[pred.params[0]] = item
+
+        # Inline evaluation of pred.body
+        from . import evaluator
+        if evaluator.evaluate(pred.body, local_env):
+            result.append(item)
+    return result
+
+
+@builtin("some")
+def some(pred, coll):
+    """Return first truthy value of (pred item) for items in coll, or nil."""
+    if not isinstance(pred, Lambda):
+        raise EvalError(f"some: expected lambda as predicate, got {type(pred).__name__}")
+
+    for item in coll:
+        local_env = {}
+        if pred.closure:
+            local_env.update(pred.closure)
+        local_env[pred.params[0]] = item
+
+        from . import evaluator
+        result = evaluator.evaluate(pred.body, local_env)
+        if result:
+            return result
+    return None
+
+
+@builtin("every?")
+def every(pred, coll):
+    """Return true if (pred item) is truthy for all items in coll."""
+    if not isinstance(pred, Lambda):
+        raise EvalError(f"every?: expected lambda as predicate, got {type(pred).__name__}")
+
+    for item in coll:
+        local_env = {}
+        if pred.closure:
+            local_env.update(pred.closure)
+        local_env[pred.params[0]] = item
+
+        from . import evaluator
+        if not evaluator.evaluate(pred.body, local_env):
+            return False
+    return True
+
+
+@builtin("empty?")
+def is_empty(coll):
+    """Return true if collection is empty."""
+    if coll is None:
+        return True
+    return len(coll) == 0
+
+
+@builtin("contains?")
+def contains(coll, key):
+    """Check if collection contains key (for dicts) or element (for lists)."""
+    if isinstance(coll, dict):
+        if isinstance(key, Keyword):
+            key = key.name
+        return key in coll
+    elif isinstance(coll, (list, tuple)):
+        return key in coll
+    return False
+
+
+def evaluate(expr: Any, env: Dict[str, Any] = None) -> Any:
+    """
+    Evaluate an S-expression in the given environment.
+
+    Args:
+        expr: The expression to evaluate
+        env: Variable bindings (name -> value)
+
+    Returns:
+        The result of evaluation
+    """
+    if env is None:
+        env = {}
+
+    # Literals
+    if isinstance(expr, (int, float, str, bool)) or expr is None:
+        return expr
+
+    # Symbol - variable lookup
+    if isinstance(expr, Symbol):
+        name = expr.name
+        if name in env:
+            return env[name]
+        if name in BUILTINS:
+            return BUILTINS[name]
+        if name == "true":
+            return True
+        if name == "false":
+            return False
+        if name == "nil":
+            return None
+        raise EvalError(f"Undefined symbol: {name}")
+
+    # Keyword - return as-is (used as map keys)
+    if isinstance(expr, Keyword):
+        return expr.name
+
+    # Lambda - return as-is (it's a value)
+    if isinstance(expr, Lambda):
+        return expr
+
+    # Binding - return as-is (resolved at execution time)
+    if isinstance(expr, Binding):
+        return expr
+
+    # Dict literal
+    if isinstance(expr, dict):
+        return {k: evaluate(v, env) for k, v in expr.items()}
+
+    # List - function call or special form
+    if isinstance(expr, list):
+        if not expr:
+            return []
+
+        head = expr[0]
+
+        # If head is a string/number/etc (not Symbol), treat as data list
+        if not isinstance(head, (Symbol, Lambda, list)):
+            return [evaluate(x, env) for x in expr]
+
+        # Special forms
+        if isinstance(head, Symbol):
+            name = head.name
+
+            # if - conditional
+            if name == "if":
+                if len(expr) < 3:
+                    raise EvalError("if requires condition and then-branch")
+                cond_result = evaluate(expr[1], env)
+                if cond_result:
+                    return evaluate(expr[2], env)
+                elif len(expr) > 3:
+                    return evaluate(expr[3], env)
+                return None
+
+            # cond - multi-way conditional
+            # Supports both Clojure style: (cond test1 result1 test2 result2 :else default)
+            # and Scheme style: (cond (test1 result1) (test2 result2) (else default))
+            if name == "cond":
+                clauses = expr[1:]
+                # Check if Clojure style (flat list) or Scheme style (nested pairs)
+                # Scheme style: first clause is (test result) - exactly 2 elements
+                # Clojure style: first clause is a test expression like (= x 0) - 3+ elements
+                first_is_scheme_clause = (
+                    clauses and
+                    isinstance(clauses[0], list) and
+                    len(clauses[0]) == 2 and
+                    not (isinstance(clauses[0][0], Symbol) and clauses[0][0].name in ('=', '<', '>', '<=', '>=', '!=', 'not=', 'and', 'or'))
+                )
+                if first_is_scheme_clause:
+                    # Scheme style: ((test result) ...)
+                    for clause in clauses:
+                        if not isinstance(clause, list) or len(clause) < 2:
+                            raise EvalError("cond clause must be (test result)")
+                        test = clause[0]
+                        # Check for else/default
+                        if isinstance(test, Symbol) and test.name in ("else", ":else"):
+                            return evaluate(clause[1], env)
+                        if isinstance(test, Keyword) and test.name == "else":
+                            return evaluate(clause[1], env)
+                        if evaluate(test, env):
+                            return evaluate(clause[1], env)
+                else:
+                    # Clojure style: test1 result1 test2 result2 ...
+                    i = 0
+                    while i < len(clauses) - 1:
+                        test = clauses[i]
+                        result = clauses[i + 1]
+                        # Check for :else
+                        if isinstance(test, Keyword) and test.name == "else":
+                            return evaluate(result, env)
+                        if isinstance(test, Symbol) and test.name == ":else":
+                            return evaluate(result, env)
+                        if evaluate(test, env):
+                            return evaluate(result, env)
+                        i += 2
+                return None
+
+            # case - switch on value
+            # (case expr val1 result1 val2 result2 :else default)
+            if name == "case":
+                if len(expr) < 2:
+                    raise EvalError("case requires expression to match")
+                match_val = evaluate(expr[1], env)
+                clauses = expr[2:]
+                i = 0
+                while i < len(clauses) - 1:
+                    test = clauses[i]
+                    result = clauses[i + 1]
+                    # Check for :else / else
+                    if isinstance(test, Keyword) and test.name == "else":
+                        return evaluate(result, env)
+                    if isinstance(test, Symbol) and test.name in (":else", "else"):
+                        return evaluate(result, env)
+                    # Evaluate test value and compare
+                    test_val = evaluate(test, env)
+                    if match_val == test_val:
+                        return evaluate(result, env)
+                    i += 2
+                return None
+
+            # and - short-circuit
+            if name == "and":
+                result = True
+                for arg in expr[1:]:
+                    result = evaluate(arg, env)
+                    if not result:
+                        return result
+                return result
+
+            # or - short-circuit
+            if name == "or":
+                result = False
+                for arg in expr[1:]:
+                    result = evaluate(arg, env)
+                    if result:
+                        return result
+                return result
+
+            # let and let* - local bindings (both bind sequentially in this impl)
+            if name in ("let", "let*"):
+                if len(expr) < 3:
+                    raise EvalError(f"{name} requires bindings and body")
+                bindings = expr[1]
+
+                local_env = dict(env)
+
+                if isinstance(bindings, list):
+                    # Check if it's ((name value) ...) style (Lisp let* style)
+                    if bindings and isinstance(bindings[0], list):
+                        for binding in bindings:
+                            if len(binding) != 2:
+                                raise EvalError(f"{name} binding must be (name value)")
+                            var_name = binding[0]
+                            if isinstance(var_name, Symbol):
+                                var_name = var_name.name
+                            value = evaluate(binding[1], local_env)
+                            local_env[var_name] = value
+                    # Vector-style [name value ...]
+                    elif len(bindings) % 2 == 0:
+                        for i in range(0, len(bindings), 2):
+                            var_name = bindings[i]
+                            if isinstance(var_name, Symbol):
+                                var_name = var_name.name
+                            value = evaluate(bindings[i + 1], local_env)
+                            local_env[var_name] = value
+                    else:
+                        raise EvalError(f"{name} bindings must be [name value ...] or ((name value) ...)")
+                else:
+                    raise EvalError(f"{name} bindings must be a list")
+
+                return evaluate(expr[2], local_env)
+
+            # lambda / fn - create function with closure
+            if name in ("lambda", "fn"):
+                if len(expr) < 3:
+                    raise EvalError("lambda requires params and body")
+                params = expr[1]
+                if not isinstance(params, list):
+                    raise EvalError("lambda params must be a list")
+                param_names = []
+                for p in params:
+                    if isinstance(p, Symbol):
+                        param_names.append(p.name)
+                    elif isinstance(p, str):
+                        param_names.append(p)
+                    else:
+                        raise EvalError(f"Invalid param: {p}")
+                # Capture current environment as closure
+                return Lambda(param_names, expr[2], dict(env))
+
+            # quote - return unevaluated
+            if name == "quote":
+                return expr[1] if len(expr) > 1 else None
+
+            # bind - create binding to analysis data
+            # (bind analysis-var)
+            # (bind analysis-var :range [0.3 1.0])
+            # (bind analysis-var :range [0 100] :transform sqrt)
+            if name == "bind":
+                if len(expr) < 2:
+                    raise EvalError("bind requires analysis reference")
+                analysis_ref = expr[1]
+                if isinstance(analysis_ref, Symbol):
+                    symbol_name = analysis_ref.name
+                    # Look up the symbol in environment
+                    if symbol_name in env:
+                        resolved = env[symbol_name]
+                        # If resolved is actual analysis data (dict with times/values or
+                        # S-expression list with Keywords), keep the symbol name as reference
+                        # for later lookup at execution time
+                        if isinstance(resolved, dict) and ("times" in resolved or "values" in resolved):
+                            analysis_ref = symbol_name  # Use name as reference, not the data
+                        elif isinstance(resolved, list) and any(isinstance(x, Keyword) for x in resolved):
+                            # Parsed S-expression analysis data ([:times [...] :duration ...])
+                            analysis_ref = symbol_name
+                        else:
+                            analysis_ref = resolved
+                    else:
+                        raise EvalError(f"bind: undefined symbol '{symbol_name}' - must reference analysis data")
+
+                # Parse optional :range [min max] and :transform
+                range_min, range_max = 0.0, 1.0
+                transform = None
+                i = 2
+                while i < len(expr):
+                    if isinstance(expr[i], Keyword):
+                        kw = expr[i].name
+                        if kw == "range" and i + 1 < len(expr):
+                            range_val = evaluate(expr[i + 1], env)  # Evaluate to get actual value
+                            if isinstance(range_val, list) and len(range_val) >= 2:
+                                range_min = float(range_val[0])
+                                range_max = float(range_val[1])
+                            i += 2
+                        elif kw == "transform" and i + 1 < len(expr):
+                            t = expr[i + 1]
+                            if isinstance(t, Symbol):
+                                transform = t.name
+                            elif isinstance(t, str):
+                                transform = t
+                            i += 2
+                        else:
+                            i += 1
+                    else:
+                        i += 1
+
+                return Binding(analysis_ref, range_min=range_min, range_max=range_max, transform=transform)
+
+            # Vector literal [a b c]
+            if name == "vec" or name == "vector":
+                return [evaluate(e, env) for e in expr[1:]]
+
+            # map - (map fn coll)
+            if name == "map":
+                if len(expr) != 3:
+                    raise EvalError("map requires fn and collection")
+                fn = evaluate(expr[1], env)
+                coll = evaluate(expr[2], env)
+                if not isinstance(fn, Lambda):
+                    raise EvalError(f"map requires lambda, got {type(fn)}")
+                result = []
+                for item in coll:
+                    local_env = {}
+                    if fn.closure:
+                        local_env.update(fn.closure)
+                    local_env.update(env)
+                    local_env[fn.params[0]] = item
+                    result.append(evaluate(fn.body, local_env))
+                return result
+
+            # map-indexed - (map-indexed fn coll)
+            if name == "map-indexed":
+                if len(expr) != 3:
+                    raise EvalError("map-indexed requires fn and collection")
+                fn = evaluate(expr[1], env)
+                coll = evaluate(expr[2], env)
+                if not isinstance(fn, Lambda):
+                    raise EvalError(f"map-indexed requires lambda, got {type(fn)}")
+                if len(fn.params) < 2:
+                    raise EvalError("map-indexed lambda needs (i item) params")
+                result = []
+                for i, item in enumerate(coll):
+                    local_env = {}
+                    if fn.closure:
+                        local_env.update(fn.closure)
+                    local_env.update(env)
+                    local_env[fn.params[0]] = i
+                    local_env[fn.params[1]] = item
+                    result.append(evaluate(fn.body, local_env))
+                return result
+
+            # reduce - (reduce fn init coll)
+            if name == "reduce":
+                if len(expr) != 4:
+                    raise EvalError("reduce requires fn, init, and collection")
+                fn = evaluate(expr[1], env)
+                acc = evaluate(expr[2], env)
+                coll = evaluate(expr[3], env)
+                if not isinstance(fn, Lambda):
+                    raise EvalError(f"reduce requires lambda, got {type(fn)}")
+                if len(fn.params) < 2:
+                    raise EvalError("reduce lambda needs (acc item) params")
+                for item in coll:
+                    local_env = {}
+                    if fn.closure:
+                        local_env.update(fn.closure)
+                    local_env.update(env)
+                    local_env[fn.params[0]] = acc
+                    local_env[fn.params[1]] = item
+                    acc = evaluate(fn.body, local_env)
+                return acc
+
+            # for-each - (for-each fn coll) - iterate with side effects
+            if name == "for-each":
+                if len(expr) != 3:
+                    raise EvalError("for-each requires fn and collection")
+                fn = evaluate(expr[1], env)
+                coll = evaluate(expr[2], env)
+                if not isinstance(fn, Lambda):
+                    raise EvalError(f"for-each requires lambda, got {type(fn)}")
+                for item in coll:
+                    local_env = {}
+                    if fn.closure:
+                        local_env.update(fn.closure)
+                    local_env.update(env)
+                    local_env[fn.params[0]] = item
+                    evaluate(fn.body, local_env)
+                return None
+
+        # Function call
+        fn = evaluate(head, env)
+        args = [evaluate(arg, env) for arg in expr[1:]]
+
+        # Call builtin
+        if callable(fn):
+            return fn(*args)
+
+        # Call lambda
+        if isinstance(fn, Lambda):
+            if len(args) != len(fn.params):
+                raise EvalError(f"Lambda expects {len(fn.params)} args, got {len(args)}")
+            # Start with closure (captured env), then overlay calling env, then params
+            local_env = {}
+            if fn.closure:
+                local_env.update(fn.closure)
+            local_env.update(env)
+            for name, value in zip(fn.params, args):
+                local_env[name] = value
+            return evaluate(fn.body, local_env)
+
+        raise EvalError(f"Not callable: {fn}")
+
+    raise EvalError(f"Cannot evaluate: {expr!r}")
+
+
+def make_env(**kwargs) -> Dict[str, Any]:
+    """Create an environment with initial bindings."""
+    return dict(kwargs)
--- a/core/artdag/sexp/external_tools.py
+++ b/core/artdag/sexp/external_tools.py
@@ -0,0 +1,292 @@
+"""
+External tool runners for effects that can't be done in FFmpeg.
+
+Supports:
+- datamosh: via ffglitch or datamoshing Python CLI
+- pixelsort: via Rust pixelsort or Python pixelsort-cli
+"""
+
+import shutil
+import subprocess
+import tempfile
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+
+def find_tool(tool_names: List[str]) -> Optional[str]:
+    """Find first available tool from a list of candidates."""
+    for name in tool_names:
+        path = shutil.which(name)
+        if path:
+            return path
+    return None
+
+
+def check_python_package(package: str) -> bool:
+    """Check if a Python package is installed."""
+    try:
+        result = subprocess.run(
+            ["python3", "-c", f"import {package}"],
+            capture_output=True,
+            timeout=5,
+        )
+        return result.returncode == 0
+    except Exception:
+        return False
+
+
+# Tool detection
+DATAMOSH_TOOLS = ["ffgac", "ffedit"]  # ffglitch tools
+PIXELSORT_TOOLS = ["pixelsort"]  # Rust CLI
+
+
+def get_available_tools() -> Dict[str, Optional[str]]:
+    """Get dictionary of available external tools."""
+    return {
+        "datamosh": find_tool(DATAMOSH_TOOLS),
+        "pixelsort": find_tool(PIXELSORT_TOOLS),
+        "datamosh_python": "datamoshing" if check_python_package("datamoshing") else None,
+        "pixelsort_python": "pixelsort" if check_python_package("pixelsort") else None,
+    }
+
+
+def run_datamosh(
+    input_path: Path,
+    output_path: Path,
+    params: Dict[str, Any],
+) -> Tuple[bool, str]:
+    """
+    Run datamosh effect using available tool.
+
+    Args:
+        input_path: Input video file
+        output_path: Output video file
+        params: Effect parameters (corruption, block_size, etc.)
+
+    Returns:
+        (success, error_message)
+    """
+    tools = get_available_tools()
+
+    corruption = params.get("corruption", 0.3)
+
+    # Try ffglitch first
+    if tools.get("datamosh"):
+        ffgac = tools["datamosh"]
+        try:
+            # ffglitch approach: remove I-frames to create datamosh effect
+            # This is a simplified version - full datamosh needs custom scripts
+            with tempfile.NamedTemporaryFile(mode='w', suffix='.js', delete=False) as f:
+                # Write a simple ffglitch script that corrupts motion vectors
+                f.write(f"""
+// Datamosh script - corrupt motion vectors
+let corruption = {corruption};
+
+export function glitch_frame(frame, stream) {{
+    if (frame.pict_type === 'P' && Math.random() < corruption) {{
+        // Corrupt motion vectors
+        let dominated = frame.mv?.forward?.overflow;
+        if (dominated) {{
+            for (let i = 0; i < dominated.length; i++) {{
+                if (Math.random() < corruption) {{
+                    dominated[i] = [
+                        Math.floor(Math.random() * 64 - 32),
+                        Math.floor(Math.random() * 64 - 32)
+                    ];
+                }}
+            }}
+        }}
+    }}
+    return frame;
+}}
+""")
+                script_path = f.name
+
+            cmd = [
+                ffgac,
+                "-i", str(input_path),
+                "-s", script_path,
+                "-o", str(output_path),
+            ]
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
+            Path(script_path).unlink(missing_ok=True)
+
+            if result.returncode == 0:
+                return True, ""
+            return False, result.stderr[:500]
+
+        except subprocess.TimeoutExpired:
+            return False, "Datamosh timeout"
+        except Exception as e:
+            return False, str(e)
+
+    # Fall back to Python datamoshing package
+    if tools.get("datamosh_python"):
+        try:
+            cmd = [
+                "python3", "-m", "datamoshing",
+                str(input_path),
+                str(output_path),
+                "--mode", "iframe_removal",
+            ]
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
+            if result.returncode == 0:
+                return True, ""
+            return False, result.stderr[:500]
+        except Exception as e:
+            return False, str(e)
+
+    return False, "No datamosh tool available. Install ffglitch or: pip install datamoshing"
+
+
+def run_pixelsort(
+    input_path: Path,
+    output_path: Path,
+    params: Dict[str, Any],
+) -> Tuple[bool, str]:
+    """
+    Run pixelsort effect using available tool.
+
+    Args:
+        input_path: Input image/frame file
+        output_path: Output image file
+        params: Effect parameters (sort_by, threshold_low, threshold_high, angle)
+
+    Returns:
+        (success, error_message)
+    """
+    tools = get_available_tools()
+
+    sort_by = params.get("sort_by", "lightness")
+    threshold_low = params.get("threshold_low", 50)
+    threshold_high = params.get("threshold_high", 200)
+    angle = params.get("angle", 0)
+
+    # Try Rust pixelsort first (faster)
+    if tools.get("pixelsort"):
+        try:
+            cmd = [
+                tools["pixelsort"],
+                str(input_path),
+                "-o", str(output_path),
+                "--sort", sort_by,
+                "-r", str(angle),
+            ]
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+            if result.returncode == 0:
+                return True, ""
+            return False, result.stderr[:500]
+        except Exception as e:
+            return False, str(e)
+
+    # Fall back to Python pixelsort-cli
+    if tools.get("pixelsort_python"):
+        try:
+            cmd = [
+                "python3", "-m", "pixelsort",
+                "--image_path", str(input_path),
+                "--output", str(output_path),
+                "--angle", str(angle),
+                "--threshold", str(threshold_low / 255),  # Normalize to 0-1
+            ]
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+            if result.returncode == 0:
+                return True, ""
+            return False, result.stderr[:500]
+        except Exception as e:
+            return False, str(e)
+
+    return False, "No pixelsort tool available. Install: cargo install pixelsort or pip install pixelsort-cli"
+
+
+def run_pixelsort_video(
+    input_path: Path,
+    output_path: Path,
+    params: Dict[str, Any],
+    fps: float = 30,
+) -> Tuple[bool, str]:
+    """
+    Run pixelsort on a video by processing each frame.
+
+    This extracts frames, processes them, then reassembles.
+    """
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+        frames_in = tmpdir / "frame_%04d.png"
+        frames_out = tmpdir / "sorted_%04d.png"
+
+        # Extract frames
+        extract_cmd = [
+            "ffmpeg", "-y",
+            "-i", str(input_path),
+            "-vf", f"fps={fps}",
+            str(frames_in),
+        ]
+        result = subprocess.run(extract_cmd, capture_output=True, timeout=300)
+        if result.returncode != 0:
+            return False, "Failed to extract frames"
+
+        # Process each frame
+        frame_files = sorted(tmpdir.glob("frame_*.png"))
+        for i, frame in enumerate(frame_files):
+            out_frame = tmpdir / f"sorted_{i:04d}.png"
+            success, error = run_pixelsort(frame, out_frame, params)
+            if not success:
+                return False, f"Frame {i}: {error}"
+
+        # Reassemble
+        # Get audio from original
+        reassemble_cmd = [
+            "ffmpeg", "-y",
+            "-framerate", str(fps),
+            "-i", str(tmpdir / "sorted_%04d.png"),
+            "-i", str(input_path),
+            "-map", "0:v", "-map", "1:a?",
+            "-c:v", "libx264", "-preset", "fast",
+            "-c:a", "copy",
+            str(output_path),
+        ]
+        result = subprocess.run(reassemble_cmd, capture_output=True, timeout=300)
+        if result.returncode != 0:
+            return False, "Failed to reassemble video"
+
+        return True, ""
+
+
+def run_external_effect(
+    effect_name: str,
+    input_path: Path,
+    output_path: Path,
+    params: Dict[str, Any],
+    is_video: bool = True,
+) -> Tuple[bool, str]:
+    """
+    Run an external effect tool.
+
+    Args:
+        effect_name: Name of effect (datamosh, pixelsort)
+        input_path: Input file
+        output_path: Output file
+        params: Effect parameters
+        is_video: Whether input is video (vs single image)
+
+    Returns:
+        (success, error_message)
+    """
+    if effect_name == "datamosh":
+        return run_datamosh(input_path, output_path, params)
+    elif effect_name == "pixelsort":
+        if is_video:
+            return run_pixelsort_video(input_path, output_path, params)
+        else:
+            return run_pixelsort(input_path, output_path, params)
+    else:
+        return False, f"Unknown external effect: {effect_name}"
+
+
+if __name__ == "__main__":
+    # Print available tools
+    print("Available external tools:")
+    for name, path in get_available_tools().items():
+        status = path if path else "NOT INSTALLED"
+        print(f"  {name}: {status}")
--- a/core/artdag/sexp/ffmpeg_compiler.py
+++ b/core/artdag/sexp/ffmpeg_compiler.py
@@ -0,0 +1,616 @@
+"""
+FFmpeg filter compiler for sexp effects.
+
+Compiles sexp effect definitions to FFmpeg filter expressions,
+with support for dynamic parameters via sendcmd scripts.
+
+Usage:
+    compiler = FFmpegCompiler()
+
+    # Compile an effect with static params
+    filter_str = compiler.compile_effect("brightness", {"amount": 50})
+    # -> "eq=brightness=0.196"
+
+    # Compile with dynamic binding to analysis data
+    filter_str, sendcmd = compiler.compile_effect_with_binding(
+        "brightness",
+        {"amount": {"_bind": "bass-data", "range_min": 0, "range_max": 100}},
+        analysis_data={"bass-data": {"times": [...], "values": [...]}},
+        segment_start=0.0,
+        segment_duration=5.0,
+    )
+    # -> ("eq=brightness=0.5", "0.0 [eq] brightness 0.5;\n0.05 [eq] brightness 0.6;...")
+"""
+
+import math
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+
+# FFmpeg filter mappings for common effects
+# Maps effect name -> {filter: str, params: {param_name: {ffmpeg_param, scale, offset}}}
+EFFECT_MAPPINGS = {
+    "invert": {
+        "filter": "negate",
+        "params": {},
+    },
+    "grayscale": {
+        "filter": "colorchannelmixer",
+        "static": "0.3:0.4:0.3:0:0.3:0.4:0.3:0:0.3:0.4:0.3",
+        "params": {},
+    },
+    "sepia": {
+        "filter": "colorchannelmixer",
+        "static": "0.393:0.769:0.189:0:0.349:0.686:0.168:0:0.272:0.534:0.131",
+        "params": {},
+    },
+    "brightness": {
+        "filter": "eq",
+        "params": {
+            "amount": {"ffmpeg_param": "brightness", "scale": 1/255, "offset": 0},
+        },
+    },
+    "contrast": {
+        "filter": "eq",
+        "params": {
+            "amount": {"ffmpeg_param": "contrast", "scale": 1.0, "offset": 0},
+        },
+    },
+    "saturation": {
+        "filter": "eq",
+        "params": {
+            "amount": {"ffmpeg_param": "saturation", "scale": 1.0, "offset": 0},
+        },
+    },
+    "hue_shift": {
+        "filter": "hue",
+        "params": {
+            "degrees": {"ffmpeg_param": "h", "scale": 1.0, "offset": 0},
+        },
+    },
+    "blur": {
+        "filter": "gblur",
+        "params": {
+            "radius": {"ffmpeg_param": "sigma", "scale": 1.0, "offset": 0},
+        },
+    },
+    "sharpen": {
+        "filter": "unsharp",
+        "params": {
+            "amount": {"ffmpeg_param": "la", "scale": 1.0, "offset": 0},
+        },
+    },
+    "pixelate": {
+        # Scale down then up to create pixelation effect
+        "filter": "scale",
+        "static": "iw/8:ih/8:flags=neighbor,scale=iw*8:ih*8:flags=neighbor",
+        "params": {},
+    },
+    "vignette": {
+        "filter": "vignette",
+        "params": {
+            "strength": {"ffmpeg_param": "a", "scale": 1.0, "offset": 0},
+        },
+    },
+    "noise": {
+        "filter": "noise",
+        "params": {
+            "amount": {"ffmpeg_param": "alls", "scale": 1.0, "offset": 0},
+        },
+    },
+    "flip": {
+        "filter": "hflip",  # Default horizontal
+        "params": {},
+    },
+    "mirror": {
+        "filter": "hflip",
+        "params": {},
+    },
+    "rotate": {
+        "filter": "rotate",
+        "params": {
+            "angle": {"ffmpeg_param": "a", "scale": math.pi/180, "offset": 0},  # degrees to radians
+        },
+    },
+    "zoom": {
+        "filter": "zoompan",
+        "params": {
+            "factor": {"ffmpeg_param": "z", "scale": 1.0, "offset": 0},
+        },
+    },
+    "posterize": {
+        # Use lutyuv to quantize levels (approximate posterization)
+        "filter": "lutyuv",
+        "static": "y=floor(val/32)*32:u=floor(val/32)*32:v=floor(val/32)*32",
+        "params": {},
+    },
+    "threshold": {
+        # Use geq for thresholding
+        "filter": "geq",
+        "static": "lum='if(gt(lum(X,Y),128),255,0)':cb=128:cr=128",
+        "params": {},
+    },
+    "edge_detect": {
+        "filter": "edgedetect",
+        "params": {
+            "low": {"ffmpeg_param": "low", "scale": 1/255, "offset": 0},
+            "high": {"ffmpeg_param": "high", "scale": 1/255, "offset": 0},
+        },
+    },
+    "swirl": {
+        "filter": "lenscorrection",  # Approximate with lens distortion
+        "params": {
+            "strength": {"ffmpeg_param": "k1", "scale": 0.1, "offset": 0},
+        },
+    },
+    "fisheye": {
+        "filter": "lenscorrection",
+        "params": {
+            "strength": {"ffmpeg_param": "k1", "scale": 1.0, "offset": 0},
+        },
+    },
+    "wave": {
+        # Wave displacement using geq - need r/g/b for RGB mode
+        "filter": "geq",
+        "static": "r='r(X+10*sin(Y/20),Y)':g='g(X+10*sin(Y/20),Y)':b='b(X+10*sin(Y/20),Y)'",
+        "params": {},
+    },
+    "rgb_split": {
+        # Chromatic aberration using geq
+        "filter": "geq",
+        "static": "r='p(X+5,Y)':g='p(X,Y)':b='p(X-5,Y)'",
+        "params": {},
+    },
+    "scanlines": {
+        "filter": "drawgrid",
+        "params": {
+            "spacing": {"ffmpeg_param": "h", "scale": 1.0, "offset": 0},
+        },
+    },
+    "film_grain": {
+        "filter": "noise",
+        "params": {
+            "intensity": {"ffmpeg_param": "alls", "scale": 100, "offset": 0},
+        },
+    },
+    "crt": {
+        "filter": "vignette",  # Simplified - just vignette for CRT look
+        "params": {},
+    },
+    "bloom": {
+        "filter": "gblur",  # Simplified bloom = blur overlay
+        "params": {
+            "radius": {"ffmpeg_param": "sigma", "scale": 1.0, "offset": 0},
+        },
+    },
+    "color_cycle": {
+        "filter": "hue",
+        "params": {
+            "speed": {"ffmpeg_param": "h", "scale": 360.0, "offset": 0, "time_expr": True},
+        },
+        "time_based": True,  # Uses time expression
+    },
+    "strobe": {
+        # Strobe using select to drop frames
+        "filter": "select",
+        "static": "'mod(n,4)'",
+        "params": {},
+    },
+    "echo": {
+        # Echo using tmix
+        "filter": "tmix",
+        "static": "frames=4:weights='1 0.5 0.25 0.125'",
+        "params": {},
+    },
+    "trails": {
+        # Trails using tblend
+        "filter": "tblend",
+        "static": "all_mode=average",
+        "params": {},
+    },
+    "kaleidoscope": {
+        # 4-way mirror kaleidoscope using FFmpeg filter chain
+        # Crops top-left quadrant, mirrors horizontally, then vertically
+        "filter": "crop",
+        "complex": True,
+        "static": "iw/2:ih/2:0:0[q];[q]split[q1][q2];[q1]hflip[qr];[q2][qr]hstack[top];[top]split[t1][t2];[t2]vflip[bot];[t1][bot]vstack",
+        "params": {},
+    },
+    "emboss": {
+        "filter": "convolution",
+        "static": "-2 -1 0 -1 1 1 0 1 2:-2 -1 0 -1 1 1 0 1 2:-2 -1 0 -1 1 1 0 1 2:-2 -1 0 -1 1 1 0 1 2",
+        "params": {},
+    },
+    "neon_glow": {
+        # Edge detect + negate for neon-like effect
+        "filter": "edgedetect",
+        "static": "mode=colormix:high=0.1",
+        "params": {},
+    },
+    "ascii_art": {
+        # Requires Python frame processing - no FFmpeg equivalent
+        "filter": None,
+        "python_primitive": "ascii_art_frame",
+        "params": {
+            "char_size": {"default": 8},
+            "alphabet": {"default": "standard"},
+            "color_mode": {"default": "color"},
+        },
+    },
+    "ascii_zones": {
+        # Requires Python frame processing - zone-based ASCII
+        "filter": None,
+        "python_primitive": "ascii_zones_frame",
+        "params": {
+            "char_size": {"default": 8},
+            "zone_threshold": {"default": 128},
+        },
+    },
+    "datamosh": {
+        # External tool: ffglitch or datamoshing CLI, falls back to Python
+        "filter": None,
+        "external_tool": "datamosh",
+        "python_primitive": "datamosh_frame",
+        "params": {
+            "block_size": {"default": 32},
+            "corruption": {"default": 0.3},
+        },
+    },
+    "pixelsort": {
+        # External tool: pixelsort CLI (Rust or Python), falls back to Python
+        "filter": None,
+        "external_tool": "pixelsort",
+        "python_primitive": "pixelsort_frame",
+        "params": {
+            "sort_by": {"default": "lightness"},
+            "threshold_low": {"default": 50},
+            "threshold_high": {"default": 200},
+            "angle": {"default": 0},
+        },
+    },
+    "ripple": {
+        # Use geq for ripple displacement
+        "filter": "geq",
+        "static": "lum='lum(X+5*sin(hypot(X-W/2,Y-H/2)/10),Y+5*cos(hypot(X-W/2,Y-H/2)/10))'",
+        "params": {},
+    },
+    "tile_grid": {
+        # Use tile filter for grid
+        "filter": "tile",
+        "static": "2x2",
+        "params": {},
+    },
+    "outline": {
+        "filter": "edgedetect",
+        "params": {},
+    },
+    "color-adjust": {
+        "filter": "eq",
+        "params": {
+            "brightness": {"ffmpeg_param": "brightness", "scale": 1/255, "offset": 0},
+            "contrast": {"ffmpeg_param": "contrast", "scale": 1.0, "offset": 0},
+        },
+    },
+}
+
+
+class FFmpegCompiler:
+    """Compiles sexp effects to FFmpeg filters with sendcmd support."""
+
+    def __init__(self, effect_mappings: Dict = None):
+        self.mappings = effect_mappings or EFFECT_MAPPINGS
+
+    def get_full_mapping(self, effect_name: str) -> Optional[Dict]:
+        """Get full mapping for an effect (including external tools and python primitives)."""
+        mapping = self.mappings.get(effect_name)
+        if not mapping:
+            # Try with underscores/hyphens converted
+            normalized = effect_name.replace("-", "_").replace(" ", "_").lower()
+            mapping = self.mappings.get(normalized)
+        return mapping
+
+    def get_mapping(self, effect_name: str) -> Optional[Dict]:
+        """Get FFmpeg filter mapping for an effect (returns None for non-FFmpeg effects)."""
+        mapping = self.get_full_mapping(effect_name)
+        # Return None if no mapping or no FFmpeg filter
+        if mapping and mapping.get("filter") is None:
+            return None
+        return mapping
+
+    def has_external_tool(self, effect_name: str) -> Optional[str]:
+        """Check if effect uses an external tool. Returns tool name or None."""
+        mapping = self.get_full_mapping(effect_name)
+        if mapping:
+            return mapping.get("external_tool")
+        return None
+
+    def has_python_primitive(self, effect_name: str) -> Optional[str]:
+        """Check if effect uses a Python primitive. Returns primitive name or None."""
+        mapping = self.get_full_mapping(effect_name)
+        if mapping:
+            return mapping.get("python_primitive")
+        return None
+
+    def is_complex_filter(self, effect_name: str) -> bool:
+        """Check if effect uses a complex filter chain."""
+        mapping = self.get_full_mapping(effect_name)
+        return bool(mapping and mapping.get("complex"))
+
+    def compile_effect(
+        self,
+        effect_name: str,
+        params: Dict[str, Any],
+    ) -> Optional[str]:
+        """
+        Compile an effect to an FFmpeg filter string with static params.
+
+        Returns None if effect has no FFmpeg mapping.
+        """
+        mapping = self.get_mapping(effect_name)
+        if not mapping:
+            return None
+
+        filter_name = mapping["filter"]
+
+        # Handle static filters (no params)
+        if "static" in mapping:
+            return f"{filter_name}={mapping['static']}"
+
+        if not mapping.get("params"):
+            return filter_name
+
+        # Build param string
+        filter_params = []
+        for param_name, param_config in mapping["params"].items():
+            if param_name in params:
+                value = params[param_name]
+                # Skip if it's a binding (handled separately)
+                if isinstance(value, dict) and ("_bind" in value or "_binding" in value):
+                    continue
+                ffmpeg_param = param_config["ffmpeg_param"]
+                scale = param_config.get("scale", 1.0)
+                offset = param_config.get("offset", 0)
+                # Handle various value types
+                if isinstance(value, (int, float)):
+                    ffmpeg_value = value * scale + offset
+                    filter_params.append(f"{ffmpeg_param}={ffmpeg_value:.4f}")
+                elif isinstance(value, str):
+                    filter_params.append(f"{ffmpeg_param}={value}")
+                elif isinstance(value, list) and value and isinstance(value[0], (int, float)):
+                    ffmpeg_value = value[0] * scale + offset
+                    filter_params.append(f"{ffmpeg_param}={ffmpeg_value:.4f}")
+
+        if filter_params:
+            return f"{filter_name}={':'.join(filter_params)}"
+        return filter_name
+
+    def compile_effect_with_bindings(
+        self,
+        effect_name: str,
+        params: Dict[str, Any],
+        analysis_data: Dict[str, Dict],
+        segment_start: float,
+        segment_duration: float,
+        sample_interval: float = 0.04,  # ~25 fps
+    ) -> Tuple[Optional[str], Optional[str], List[str]]:
+        """
+        Compile an effect with dynamic bindings to a filter + sendcmd script.
+
+        Returns:
+            (filter_string, sendcmd_script, bound_param_names)
+            - filter_string: Initial FFmpeg filter (may have placeholder values)
+            - sendcmd_script: Script content for sendcmd filter
+            - bound_param_names: List of params that have bindings
+        """
+        mapping = self.get_mapping(effect_name)
+        if not mapping:
+            return None, None, []
+
+        filter_name = mapping["filter"]
+        static_params = []
+        bound_params = []
+        sendcmd_lines = []
+
+        # Handle time-based effects (use FFmpeg expressions with 't')
+        if mapping.get("time_based"):
+            for param_name, param_config in mapping.get("params", {}).items():
+                if param_name in params:
+                    value = params[param_name]
+                    ffmpeg_param = param_config["ffmpeg_param"]
+                    scale = param_config.get("scale", 1.0)
+                    if isinstance(value, (int, float)):
+                        # Create time expression: h='t*speed*scale'
+                        static_params.append(f"{ffmpeg_param}='t*{value}*{scale}'")
+                    else:
+                        static_params.append(f"{ffmpeg_param}='t*{scale}'")
+            if static_params:
+                filter_str = f"{filter_name}={':'.join(static_params)}"
+            else:
+                filter_str = f"{filter_name}=h='t*360'"  # Default rotation
+            return filter_str, None, []
+
+        # Process each param
+        for param_name, param_config in mapping.get("params", {}).items():
+            if param_name not in params:
+                continue
+
+            value = params[param_name]
+            ffmpeg_param = param_config["ffmpeg_param"]
+            scale = param_config.get("scale", 1.0)
+            offset = param_config.get("offset", 0)
+
+            # Check if it's a binding
+            if isinstance(value, dict) and ("_bind" in value or "_binding" in value):
+                bind_ref = value.get("_bind") or value.get("_binding")
+                range_min = value.get("range_min", 0.0)
+                range_max = value.get("range_max", 1.0)
+                transform = value.get("transform")
+
+                # Get analysis data
+                analysis = analysis_data.get(bind_ref)
+                if not analysis:
+                    # Try without -data suffix
+                    analysis = analysis_data.get(bind_ref.replace("-data", ""))
+
+                if analysis and "times" in analysis and "values" in analysis:
+                    times = analysis["times"]
+                    values = analysis["values"]
+
+                    # Generate sendcmd entries for this segment
+                    segment_end = segment_start + segment_duration
+                    t = 0.0  # Time relative to segment start
+
+                    while t < segment_duration:
+                        abs_time = segment_start + t
+
+                        # Find analysis value at this time
+                        raw_value = self._interpolate_value(times, values, abs_time)
+
+                        # Apply transform
+                        if transform == "sqrt":
+                            raw_value = math.sqrt(max(0, raw_value))
+                        elif transform == "pow2":
+                            raw_value = raw_value ** 2
+                        elif transform == "log":
+                            raw_value = math.log(max(0.001, raw_value))
+
+                        # Map to range
+                        mapped_value = range_min + raw_value * (range_max - range_min)
+
+                        # Apply FFmpeg scaling
+                        ffmpeg_value = mapped_value * scale + offset
+
+                        # Add sendcmd line (time relative to segment)
+                        sendcmd_lines.append(f"{t:.3f} [{filter_name}] {ffmpeg_param} {ffmpeg_value:.4f};")
+
+                        t += sample_interval
+
+                    bound_params.append(param_name)
+                    # Use initial value for the filter string
+                    initial_value = self._interpolate_value(times, values, segment_start)
+                    initial_mapped = range_min + initial_value * (range_max - range_min)
+                    initial_ffmpeg = initial_mapped * scale + offset
+                    static_params.append(f"{ffmpeg_param}={initial_ffmpeg:.4f}")
+                else:
+                    # No analysis data, use range midpoint
+                    mid_value = (range_min + range_max) / 2
+                    ffmpeg_value = mid_value * scale + offset
+                    static_params.append(f"{ffmpeg_param}={ffmpeg_value:.4f}")
+            else:
+                # Static value - handle various types
+                if isinstance(value, (int, float)):
+                    ffmpeg_value = value * scale + offset
+                    static_params.append(f"{ffmpeg_param}={ffmpeg_value:.4f}")
+                elif isinstance(value, str):
+                    # String value - use as-is (e.g., for direction parameters)
+                    static_params.append(f"{ffmpeg_param}={value}")
+                elif isinstance(value, list) and value:
+                    # List - try to use first numeric element
+                    first = value[0]
+                    if isinstance(first, (int, float)):
+                        ffmpeg_value = first * scale + offset
+                        static_params.append(f"{ffmpeg_param}={ffmpeg_value:.4f}")
+                # Skip other types
+
+        # Handle static filters
+        if "static" in mapping:
+            filter_str = f"{filter_name}={mapping['static']}"
+        elif static_params:
+            filter_str = f"{filter_name}={':'.join(static_params)}"
+        else:
+            filter_str = filter_name
+
+        # Combine sendcmd lines
+        sendcmd_script = "\n".join(sendcmd_lines) if sendcmd_lines else None
+
+        return filter_str, sendcmd_script, bound_params
+
+    def _interpolate_value(
+        self,
+        times: List[float],
+        values: List[float],
+        target_time: float,
+    ) -> float:
+        """Interpolate a value from analysis data at a given time."""
+        if not times or not values:
+            return 0.5
+
+        # Find surrounding indices
+        if target_time <= times[0]:
+            return values[0]
+        if target_time >= times[-1]:
+            return values[-1]
+
+        # Binary search for efficiency
+        lo, hi = 0, len(times) - 1
+        while lo < hi - 1:
+            mid = (lo + hi) // 2
+            if times[mid] <= target_time:
+                lo = mid
+            else:
+                hi = mid
+
+        # Linear interpolation
+        t0, t1 = times[lo], times[hi]
+        v0, v1 = values[lo], values[hi]
+
+        if t1 == t0:
+            return v0
+
+        alpha = (target_time - t0) / (t1 - t0)
+        return v0 + alpha * (v1 - v0)
+
+
+def generate_sendcmd_filter(
+    effects: List[Dict],
+    analysis_data: Dict[str, Dict],
+    segment_start: float,
+    segment_duration: float,
+) -> Tuple[str, Optional[Path]]:
+    """
+    Generate FFmpeg filter chain with sendcmd for dynamic effects.
+
+    Args:
+        effects: List of effect configs with name and params
+        analysis_data: Analysis data keyed by name
+        segment_start: Segment start time in source
+        segment_duration: Segment duration
+
+    Returns:
+        (filter_chain_string, sendcmd_file_path or None)
+    """
+    import tempfile
+
+    compiler = FFmpegCompiler()
+    filters = []
+    all_sendcmd_lines = []
+
+    for effect in effects:
+        effect_name = effect.get("effect")
+        params = {k: v for k, v in effect.items() if k != "effect"}
+
+        filter_str, sendcmd, _ = compiler.compile_effect_with_bindings(
+            effect_name,
+            params,
+            analysis_data,
+            segment_start,
+            segment_duration,
+        )
+
+        if filter_str:
+            filters.append(filter_str)
+            if sendcmd:
+                all_sendcmd_lines.append(sendcmd)
+
+    if not filters:
+        return "", None
+
+    filter_chain = ",".join(filters)
+
+    # NOTE: sendcmd disabled - FFmpeg's sendcmd filter has compatibility issues.
+    # Bindings use their initial value (sampled at segment start time).
+    # This is acceptable since each segment is only ~8 seconds.
+    # The binding value is still music-reactive (varies per segment).
+    sendcmd_path = None
+
+    return filter_chain, sendcmd_path
--- a/core/artdag/sexp/parser.py
+++ b/core/artdag/sexp/parser.py
@@ -0,0 +1,425 @@
+"""
+S-expression parser for ArtDAG recipes and plans.
+
+Supports:
+- Lists: (a b c)
+- Symbols: foo, bar-baz, ->
+- Keywords: :key
+- Strings: "hello world"
+- Numbers: 42, 3.14, -1.5
+- Comments: ; to end of line
+- Vectors: [a b c] (syntactic sugar for lists)
+- Maps: {:key1 val1 :key2 val2} (parsed as Python dicts)
+"""
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Union
+import re
+
+
+@dataclass
+class Symbol:
+    """An unquoted symbol/identifier."""
+    name: str
+
+    def __repr__(self):
+        return f"Symbol({self.name!r})"
+
+    def __eq__(self, other):
+        if isinstance(other, Symbol):
+            return self.name == other.name
+        if isinstance(other, str):
+            return self.name == other
+        return False
+
+    def __hash__(self):
+        return hash(self.name)
+
+
+@dataclass
+class Keyword:
+    """A keyword starting with colon."""
+    name: str
+
+    def __repr__(self):
+        return f"Keyword({self.name!r})"
+
+    def __eq__(self, other):
+        if isinstance(other, Keyword):
+            return self.name == other.name
+        return False
+
+    def __hash__(self):
+        return hash((':' , self.name))
+
+
+@dataclass
+class Lambda:
+    """A lambda/anonymous function with closure."""
+    params: List[str]  # Parameter names
+    body: Any  # Expression body
+    closure: Dict = None  # Captured environment (optional for backwards compat)
+
+    def __repr__(self):
+        return f"Lambda({self.params}, {self.body!r})"
+
+
+@dataclass
+class Binding:
+    """A binding to analysis data for dynamic effect parameters."""
+    analysis_ref: str  # Name of analysis variable
+    track: str = None  # Optional track name (e.g., "bass", "energy")
+    range_min: float = 0.0  # Output range minimum
+    range_max: float = 1.0  # Output range maximum
+    transform: str = None  # Optional transform: "sqrt", "pow2", "log", etc.
+
+    def __repr__(self):
+        t = f", transform={self.transform!r}" if self.transform else ""
+        return f"Binding({self.analysis_ref!r}, track={self.track!r}, range=[{self.range_min}, {self.range_max}]{t})"
+
+
+class ParseError(Exception):
+    """Error during S-expression parsing."""
+    def __init__(self, message: str, position: int = 0, line: int = 1, col: int = 1):
+        self.position = position
+        self.line = line
+        self.col = col
+        super().__init__(f"{message} at line {line}, column {col}")
+
+
+class Tokenizer:
+    """Tokenize S-expression text into tokens."""
+
+    # Token patterns
+    WHITESPACE = re.compile(r'\s+')
+    COMMENT = re.compile(r';[^\n]*')
+    STRING = re.compile(r'"(?:[^"\\]|\\.)*"')
+    NUMBER = re.compile(r'-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?')
+    KEYWORD = re.compile(r':[a-zA-Z_][a-zA-Z0-9_-]*')
+    SYMBOL = re.compile(r'[a-zA-Z_*+\-><=/!?][a-zA-Z0-9_*+\-><=/!?.:]*')
+
+    def __init__(self, text: str):
+        self.text = text
+        self.pos = 0
+        self.line = 1
+        self.col = 1
+
+    def _advance(self, count: int = 1):
+        """Advance position, tracking line/column."""
+        for _ in range(count):
+            if self.pos < len(self.text):
+                if self.text[self.pos] == '\n':
+                    self.line += 1
+                    self.col = 1
+                else:
+                    self.col += 1
+                self.pos += 1
+
+    def _skip_whitespace_and_comments(self):
+        """Skip whitespace and comments."""
+        while self.pos < len(self.text):
+            # Whitespace
+            match = self.WHITESPACE.match(self.text, self.pos)
+            if match:
+                self._advance(match.end() - self.pos)
+                continue
+
+            # Comments
+            match = self.COMMENT.match(self.text, self.pos)
+            if match:
+                self._advance(match.end() - self.pos)
+                continue
+
+            break
+
+    def peek(self) -> str | None:
+        """Peek at current character."""
+        self._skip_whitespace_and_comments()
+        if self.pos >= len(self.text):
+            return None
+        return self.text[self.pos]
+
+    def next_token(self) -> Any:
+        """Get the next token."""
+        self._skip_whitespace_and_comments()
+
+        if self.pos >= len(self.text):
+            return None
+
+        char = self.text[self.pos]
+        start_line, start_col = self.line, self.col
+
+        # Single-character tokens (parens, brackets, braces)
+        if char in '()[]{}':
+            self._advance()
+            return char
+
+        # String
+        if char == '"':
+            match = self.STRING.match(self.text, self.pos)
+            if not match:
+                raise ParseError("Unterminated string", self.pos, self.line, self.col)
+            self._advance(match.end() - self.pos)
+            # Parse escape sequences
+            content = match.group()[1:-1]
+            content = content.replace('\\n', '\n')
+            content = content.replace('\\t', '\t')
+            content = content.replace('\\"', '"')
+            content = content.replace('\\\\', '\\')
+            return content
+
+        # Keyword
+        if char == ':':
+            match = self.KEYWORD.match(self.text, self.pos)
+            if match:
+                self._advance(match.end() - self.pos)
+                return Keyword(match.group()[1:])  # Strip leading colon
+            raise ParseError(f"Invalid keyword", self.pos, self.line, self.col)
+
+        # Number (must check before symbol due to - prefix)
+        if char.isdigit() or (char == '-' and self.pos + 1 < len(self.text) and
+                              (self.text[self.pos + 1].isdigit() or self.text[self.pos + 1] == '.')):
+            match = self.NUMBER.match(self.text, self.pos)
+            if match:
+                self._advance(match.end() - self.pos)
+                num_str = match.group()
+                if '.' in num_str or 'e' in num_str or 'E' in num_str:
+                    return float(num_str)
+                return int(num_str)
+
+        # Symbol
+        match = self.SYMBOL.match(self.text, self.pos)
+        if match:
+            self._advance(match.end() - self.pos)
+            return Symbol(match.group())
+
+        raise ParseError(f"Unexpected character: {char!r}", self.pos, self.line, self.col)
+
+
+def parse(text: str) -> Any:
+    """
+    Parse an S-expression string into Python data structures.
+
+    Returns:
+        Parsed S-expression as nested Python structures:
+        - Lists become Python lists
+        - Symbols become Symbol objects
+        - Keywords become Keyword objects
+        - Strings become Python strings
+        - Numbers become int/float
+
+    Example:
+        >>> parse('(recipe "test" :version "1.0")')
+        [Symbol('recipe'), 'test', Keyword('version'), '1.0']
+    """
+    tokenizer = Tokenizer(text)
+    result = _parse_expr(tokenizer)
+
+    # Check for trailing content
+    if tokenizer.peek() is not None:
+        raise ParseError("Unexpected content after expression",
+                        tokenizer.pos, tokenizer.line, tokenizer.col)
+
+    return result
+
+
+def parse_all(text: str) -> List[Any]:
+    """
+    Parse multiple S-expressions from a string.
+
+    Returns list of parsed expressions.
+    """
+    tokenizer = Tokenizer(text)
+    results = []
+
+    while tokenizer.peek() is not None:
+        results.append(_parse_expr(tokenizer))
+
+    return results
+
+
+def _parse_expr(tokenizer: Tokenizer) -> Any:
+    """Parse a single expression."""
+    token = tokenizer.next_token()
+
+    if token is None:
+        raise ParseError("Unexpected end of input", tokenizer.pos, tokenizer.line, tokenizer.col)
+
+    # List
+    if token == '(':
+        return _parse_list(tokenizer, ')')
+
+    # Vector (sugar for list)
+    if token == '[':
+        return _parse_list(tokenizer, ']')
+
+    # Map/dict: {:key1 val1 :key2 val2}
+    if token == '{':
+        return _parse_map(tokenizer)
+
+    # Unexpected closers
+    if token in (')', ']', '}'):
+        raise ParseError(f"Unexpected {token!r}", tokenizer.pos, tokenizer.line, tokenizer.col)
+
+    # Atom
+    return token
+
+
+def _parse_list(tokenizer: Tokenizer, closer: str) -> List[Any]:
+    """Parse a list until the closing delimiter."""
+    items = []
+
+    while True:
+        char = tokenizer.peek()
+
+        if char is None:
+            raise ParseError(f"Unterminated list, expected {closer!r}",
+                           tokenizer.pos, tokenizer.line, tokenizer.col)
+
+        if char == closer:
+            tokenizer.next_token()  # Consume closer
+            return items
+
+        items.append(_parse_expr(tokenizer))
+
+
+def _parse_map(tokenizer: Tokenizer) -> Dict[str, Any]:
+    """Parse a map/dict: {:key1 val1 :key2 val2} -> {"key1": val1, "key2": val2}."""
+    result = {}
+
+    while True:
+        char = tokenizer.peek()
+
+        if char is None:
+            raise ParseError("Unterminated map, expected '}'",
+                           tokenizer.pos, tokenizer.line, tokenizer.col)
+
+        if char == '}':
+            tokenizer.next_token()  # Consume closer
+            return result
+
+        # Parse key (should be a keyword like :key)
+        key_token = _parse_expr(tokenizer)
+        if isinstance(key_token, Keyword):
+            key = key_token.name
+        elif isinstance(key_token, str):
+            key = key_token
+        else:
+            raise ParseError(f"Map key must be keyword or string, got {type(key_token).__name__}",
+                           tokenizer.pos, tokenizer.line, tokenizer.col)
+
+        # Parse value
+        value = _parse_expr(tokenizer)
+        result[key] = value
+
+
+def serialize(expr: Any, indent: int = 0, pretty: bool = False) -> str:
+    """
+    Serialize a Python data structure back to S-expression format.
+
+    Args:
+        expr: The expression to serialize
+        indent: Current indentation level (for pretty printing)
+        pretty: Whether to use pretty printing with newlines
+
+    Returns:
+        S-expression string
+    """
+    if isinstance(expr, list):
+        if not expr:
+            return "()"
+
+        if pretty:
+            return _serialize_pretty(expr, indent)
+        else:
+            items = [serialize(item, indent, False) for item in expr]
+            return "(" + " ".join(items) + ")"
+
+    if isinstance(expr, Symbol):
+        return expr.name
+
+    if isinstance(expr, Keyword):
+        return f":{expr.name}"
+
+    if isinstance(expr, Lambda):
+        params = " ".join(expr.params)
+        body = serialize(expr.body, indent, pretty)
+        return f"(fn [{params}] {body})"
+
+    if isinstance(expr, Binding):
+        # analysis_ref can be a string, node ID, or dict - serialize it properly
+        if isinstance(expr.analysis_ref, str):
+            ref_str = f'"{expr.analysis_ref}"'
+        else:
+            ref_str = serialize(expr.analysis_ref, indent, pretty)
+        s = f"(bind {ref_str} :range [{expr.range_min} {expr.range_max}]"
+        if expr.transform:
+            s += f" :transform {expr.transform}"
+        return s + ")"
+
+    if isinstance(expr, str):
+        # Escape special characters
+        escaped = expr.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n').replace('\t', '\\t')
+        return f'"{escaped}"'
+
+    if isinstance(expr, bool):
+        return "true" if expr else "false"
+
+    if isinstance(expr, (int, float)):
+        return str(expr)
+
+    if expr is None:
+        return "nil"
+
+    if isinstance(expr, dict):
+        # Serialize dict as property list: {:key1 val1 :key2 val2}
+        items = []
+        for k, v in expr.items():
+            items.append(f":{k}")
+            items.append(serialize(v, indent, pretty))
+        return "{" + " ".join(items) + "}"
+
+    raise ValueError(f"Cannot serialize {type(expr).__name__}: {expr!r}")
+
+
+def _serialize_pretty(expr: List, indent: int) -> str:
+    """Pretty-print a list expression with smart formatting."""
+    if not expr:
+        return "()"
+
+    prefix = "  " * indent
+    inner_prefix = "  " * (indent + 1)
+
+    # Check if this is a simple list that fits on one line
+    simple = serialize(expr, indent, False)
+    if len(simple) < 60 and '\n' not in simple:
+        return simple
+
+    # Start building multiline output
+    head = serialize(expr[0], indent + 1, False)
+    parts = [f"({head}"]
+
+    i = 1
+    while i < len(expr):
+        item = expr[i]
+
+        # Group keyword-value pairs on same line
+        if isinstance(item, Keyword) and i + 1 < len(expr):
+            key = serialize(item, 0, False)
+            val = serialize(expr[i + 1], indent + 1, False)
+
+            # If value is short, put on same line
+            if len(val) < 50 and '\n' not in val:
+                parts.append(f"{inner_prefix}{key} {val}")
+            else:
+                # Value is complex, serialize it pretty
+                val_pretty = serialize(expr[i + 1], indent + 1, True)
+                parts.append(f"{inner_prefix}{key} {val_pretty}")
+            i += 2
+        else:
+            # Regular item
+            item_str = serialize(item, indent + 1, True)
+            parts.append(f"{inner_prefix}{item_str}")
+            i += 1
+
+    return "\n".join(parts) + ")"
--- a/core/artdag/sexp/planner.py
+++ b/core/artdag/sexp/planner.py
--- a/core/artdag/sexp/primitives.py
+++ b/core/artdag/sexp/primitives.py
@@ -0,0 +1,620 @@
+"""
+Frame processing primitives for sexp effects.
+
+These primitives are called by sexp effect definitions and operate on
+numpy arrays (frames). They're used when falling back to Python rendering
+instead of FFmpeg.
+
+Required: numpy, PIL
+"""
+
+import math
+from typing import Any, Dict, List, Optional, Tuple
+
+try:
+    import numpy as np
+    HAS_NUMPY = True
+except ImportError:
+    HAS_NUMPY = False
+    np = None
+
+try:
+    from PIL import Image, ImageDraw, ImageFont
+    HAS_PIL = True
+except ImportError:
+    HAS_PIL = False
+
+
+# ASCII character sets for different styles
+ASCII_ALPHABETS = {
+    "standard": " .:-=+*#%@",
+    "blocks": " ░▒▓█",
+    "simple": " .-:+=xX#",
+    "detailed": " .'`^\",:;Il!i><~+_-?][}{1)(|\\/tfjrxnuvczXYUJCLQ0OZmwqpdbkhao*#MW&8%B@$",
+    "binary": " █",
+}
+
+
+def check_deps():
+    """Check that required dependencies are available."""
+    if not HAS_NUMPY:
+        raise ImportError("numpy required for frame primitives: pip install numpy")
+    if not HAS_PIL:
+        raise ImportError("PIL required for frame primitives: pip install Pillow")
+
+
+def frame_to_image(frame: np.ndarray) -> Image.Image:
+    """Convert numpy frame to PIL Image."""
+    check_deps()
+    if frame.dtype != np.uint8:
+        frame = np.clip(frame, 0, 255).astype(np.uint8)
+    return Image.fromarray(frame)
+
+
+def image_to_frame(img: Image.Image) -> np.ndarray:
+    """Convert PIL Image to numpy frame."""
+    check_deps()
+    return np.array(img)
+
+
+# ============================================================================
+# ASCII Art Primitives
+# ============================================================================
+
+def cell_sample(frame: np.ndarray, cell_size: int = 8) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Sample frame into cells, returning average colors and luminances.
+
+    Args:
+        frame: Input frame (H, W, C)
+        cell_size: Size of each cell
+
+    Returns:
+        (colors, luminances) - colors is (rows, cols, 3), luminances is (rows, cols)
+    """
+    check_deps()
+    h, w = frame.shape[:2]
+    rows = h // cell_size
+    cols = w // cell_size
+
+    colors = np.zeros((rows, cols, 3), dtype=np.float32)
+    luminances = np.zeros((rows, cols), dtype=np.float32)
+
+    for r in range(rows):
+        for c in range(cols):
+            y0, y1 = r * cell_size, (r + 1) * cell_size
+            x0, x1 = c * cell_size, (c + 1) * cell_size
+            cell = frame[y0:y1, x0:x1]
+
+            # Average color
+            avg_color = cell.mean(axis=(0, 1))
+            colors[r, c] = avg_color[:3]  # RGB only
+
+            # Luminance (ITU-R BT.601)
+            lum = 0.299 * avg_color[0] + 0.587 * avg_color[1] + 0.114 * avg_color[2]
+            luminances[r, c] = lum
+
+    return colors, luminances
+
+
+def luminance_to_chars(
+    luminances: np.ndarray,
+    alphabet: str = "standard",
+    contrast: float = 1.0,
+) -> List[List[str]]:
+    """
+    Convert luminance values to ASCII characters.
+
+    Args:
+        luminances: 2D array of luminance values (0-255)
+        alphabet: Name of character set or custom string
+        contrast: Contrast multiplier
+
+    Returns:
+        2D list of characters
+    """
+    check_deps()
+    chars = ASCII_ALPHABETS.get(alphabet, alphabet)
+    n_chars = len(chars)
+
+    rows, cols = luminances.shape
+    result = []
+
+    for r in range(rows):
+        row_chars = []
+        for c in range(cols):
+            lum = luminances[r, c]
+            # Apply contrast around midpoint
+            lum = 128 + (lum - 128) * contrast
+            lum = np.clip(lum, 0, 255)
+            # Map to character index
+            idx = int(lum / 256 * n_chars)
+            idx = min(idx, n_chars - 1)
+            row_chars.append(chars[idx])
+        result.append(row_chars)
+
+    return result
+
+
+def render_char_grid(
+    frame: np.ndarray,
+    chars: List[List[str]],
+    colors: np.ndarray,
+    char_size: int = 8,
+    color_mode: str = "color",
+    background: Tuple[int, int, int] = (0, 0, 0),
+) -> np.ndarray:
+    """
+    Render character grid to an image.
+
+    Args:
+        frame: Original frame (for dimensions)
+        chars: 2D list of characters
+        colors: Color for each cell (rows, cols, 3)
+        char_size: Size of each character cell
+        color_mode: "color", "white", or "green"
+        background: Background RGB color
+
+    Returns:
+        Rendered frame
+    """
+    check_deps()
+    h, w = frame.shape[:2]
+    rows = len(chars)
+    cols = len(chars[0]) if chars else 0
+
+    # Create output image
+    img = Image.new("RGB", (w, h), background)
+    draw = ImageDraw.Draw(img)
+
+    # Try to get a monospace font
+    try:
+        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf", char_size)
+    except (IOError, OSError):
+        try:
+            font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf", char_size)
+        except (IOError, OSError):
+            font = ImageFont.load_default()
+
+    for r in range(rows):
+        for c in range(cols):
+            char = chars[r][c]
+            if char == ' ':
+                continue
+
+            x = c * char_size
+            y = r * char_size
+
+            if color_mode == "color":
+                color = tuple(int(v) for v in colors[r, c])
+            elif color_mode == "green":
+                color = (0, 255, 0)
+            else:  # white
+                color = (255, 255, 255)
+
+            draw.text((x, y), char, fill=color, font=font)
+
+    return np.array(img)
+
+
+def ascii_art_frame(
+    frame: np.ndarray,
+    char_size: int = 8,
+    alphabet: str = "standard",
+    color_mode: str = "color",
+    contrast: float = 1.5,
+    background: Tuple[int, int, int] = (0, 0, 0),
+) -> np.ndarray:
+    """
+    Apply ASCII art effect to a frame.
+
+    This is the main entry point for the ascii_art effect.
+    """
+    check_deps()
+    colors, luminances = cell_sample(frame, char_size)
+    chars = luminance_to_chars(luminances, alphabet, contrast)
+    return render_char_grid(frame, chars, colors, char_size, color_mode, background)
+
+
+# ============================================================================
+# ASCII Zones Primitives
+# ============================================================================
+
+def ascii_zones_frame(
+    frame: np.ndarray,
+    char_size: int = 8,
+    zone_threshold: int = 128,
+    dark_chars: str = " .-:",
+    light_chars: str = "=+*#",
+) -> np.ndarray:
+    """
+    Apply zone-based ASCII art effect.
+
+    Different character sets for dark vs light regions.
+    """
+    check_deps()
+    colors, luminances = cell_sample(frame, char_size)
+
+    rows, cols = luminances.shape
+    chars = []
+
+    for r in range(rows):
+        row_chars = []
+        for c in range(cols):
+            lum = luminances[r, c]
+            if lum < zone_threshold:
+                # Dark zone
+                charset = dark_chars
+                local_lum = lum / zone_threshold  # 0-1 within zone
+            else:
+                # Light zone
+                charset = light_chars
+                local_lum = (lum - zone_threshold) / (255 - zone_threshold)
+
+            idx = int(local_lum * len(charset))
+            idx = min(idx, len(charset) - 1)
+            row_chars.append(charset[idx])
+        chars.append(row_chars)
+
+    return render_char_grid(frame, chars, colors, char_size, "color", (0, 0, 0))
+
+
+# ============================================================================
+# Kaleidoscope Primitives (Python fallback)
+# ============================================================================
+
+def kaleidoscope_displace(
+    w: int,
+    h: int,
+    segments: int = 6,
+    rotation: float = 0,
+    cx: float = None,
+    cy: float = None,
+    zoom: float = 1.0,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Compute kaleidoscope displacement coordinates.
+
+    Returns (x_coords, y_coords) arrays for remapping.
+    """
+    check_deps()
+    if cx is None:
+        cx = w / 2
+    if cy is None:
+        cy = h / 2
+
+    # Create coordinate grids
+    y_grid, x_grid = np.mgrid[0:h, 0:w].astype(np.float32)
+
+    # Center coordinates
+    x_centered = x_grid - cx
+    y_centered = y_grid - cy
+
+    # Convert to polar
+    r = np.sqrt(x_centered**2 + y_centered**2) / zoom
+    theta = np.arctan2(y_centered, x_centered)
+
+    # Apply rotation
+    theta = theta - np.radians(rotation)
+
+    # Kaleidoscope: fold angle into segment
+    segment_angle = 2 * np.pi / segments
+    theta = np.abs(np.mod(theta, segment_angle) - segment_angle / 2)
+
+    # Convert back to cartesian
+    x_new = r * np.cos(theta) + cx
+    y_new = r * np.sin(theta) + cy
+
+    return x_new, y_new
+
+
+def remap(
+    frame: np.ndarray,
+    x_coords: np.ndarray,
+    y_coords: np.ndarray,
+) -> np.ndarray:
+    """
+    Remap frame using coordinate arrays.
+
+    Uses bilinear interpolation.
+    """
+    check_deps()
+    from scipy import ndimage
+
+    h, w = frame.shape[:2]
+
+    # Clip coordinates
+    x_coords = np.clip(x_coords, 0, w - 1)
+    y_coords = np.clip(y_coords, 0, h - 1)
+
+    # Remap each channel
+    if len(frame.shape) == 3:
+        result = np.zeros_like(frame)
+        for c in range(frame.shape[2]):
+            result[:, :, c] = ndimage.map_coordinates(
+                frame[:, :, c],
+                [y_coords, x_coords],
+                order=1,
+                mode='reflect',
+            )
+        return result
+    else:
+        return ndimage.map_coordinates(frame, [y_coords, x_coords], order=1, mode='reflect')
+
+
+def kaleidoscope_frame(
+    frame: np.ndarray,
+    segments: int = 6,
+    rotation: float = 0,
+    center_x: float = 0.5,
+    center_y: float = 0.5,
+    zoom: float = 1.0,
+) -> np.ndarray:
+    """
+    Apply kaleidoscope effect to a frame.
+
+    This is a Python fallback - FFmpeg version is faster.
+    """
+    check_deps()
+    h, w = frame.shape[:2]
+    cx = w * center_x
+    cy = h * center_y
+
+    x_coords, y_coords = kaleidoscope_displace(w, h, segments, rotation, cx, cy, zoom)
+    return remap(frame, x_coords, y_coords)
+
+
+# ============================================================================
+# Datamosh Primitives (simplified Python version)
+# ============================================================================
+
+def datamosh_frame(
+    frame: np.ndarray,
+    prev_frame: Optional[np.ndarray],
+    block_size: int = 32,
+    corruption: float = 0.3,
+    max_offset: int = 50,
+    color_corrupt: bool = True,
+) -> np.ndarray:
+    """
+    Simplified datamosh effect using block displacement.
+
+    This is a basic approximation - real datamosh works on compressed video.
+    """
+    check_deps()
+    if prev_frame is None:
+        return frame.copy()
+
+    h, w = frame.shape[:2]
+    result = frame.copy()
+
+    # Process in blocks
+    for y in range(0, h - block_size, block_size):
+        for x in range(0, w - block_size, block_size):
+            if np.random.random() < corruption:
+                # Random offset
+                ox = np.random.randint(-max_offset, max_offset + 1)
+                oy = np.random.randint(-max_offset, max_offset + 1)
+
+                # Source from previous frame with offset
+                src_y = np.clip(y + oy, 0, h - block_size)
+                src_x = np.clip(x + ox, 0, w - block_size)
+
+                block = prev_frame[src_y:src_y+block_size, src_x:src_x+block_size]
+
+                # Color corruption
+                if color_corrupt and np.random.random() < 0.3:
+                    # Swap or shift channels
+                    block = np.roll(block, np.random.randint(1, 3), axis=2)
+
+                result[y:y+block_size, x:x+block_size] = block
+
+    return result
+
+
+# ============================================================================
+# Pixelsort Primitives (Python version)
+# ============================================================================
+
+def pixelsort_frame(
+    frame: np.ndarray,
+    sort_by: str = "lightness",
+    threshold_low: float = 50,
+    threshold_high: float = 200,
+    angle: float = 0,
+    reverse: bool = False,
+) -> np.ndarray:
+    """
+    Apply pixel sorting effect to a frame.
+    """
+    check_deps()
+    from scipy import ndimage
+
+    # Rotate if needed
+    if angle != 0:
+        frame = ndimage.rotate(frame, -angle, reshape=False, mode='reflect')
+
+    h, w = frame.shape[:2]
+    result = frame.copy()
+
+    # Compute sort key
+    if sort_by == "lightness":
+        key = 0.299 * frame[:,:,0] + 0.587 * frame[:,:,1] + 0.114 * frame[:,:,2]
+    elif sort_by == "hue":
+        # Simple hue approximation
+        key = np.arctan2(
+            np.sqrt(3) * (frame[:,:,1].astype(float) - frame[:,:,2]),
+            2 * frame[:,:,0].astype(float) - frame[:,:,1] - frame[:,:,2]
+        )
+    elif sort_by == "saturation":
+        mx = frame.max(axis=2).astype(float)
+        mn = frame.min(axis=2).astype(float)
+        key = np.where(mx > 0, (mx - mn) / mx, 0)
+    else:
+        key = frame[:,:,0]  # Red channel
+
+    # Sort each row
+    for y in range(h):
+        row = result[y]
+        row_key = key[y]
+
+        # Find sortable intervals (pixels within threshold)
+        mask = (row_key >= threshold_low) & (row_key <= threshold_high)
+
+        # Find runs of True in mask
+        runs = []
+        start = None
+        for x in range(w):
+            if mask[x] and start is None:
+                start = x
+            elif not mask[x] and start is not None:
+                if x - start > 1:
+                    runs.append((start, x))
+                start = None
+        if start is not None and w - start > 1:
+            runs.append((start, w))
+
+        # Sort each run
+        for start, end in runs:
+            indices = np.argsort(row_key[start:end])
+            if reverse:
+                indices = indices[::-1]
+            result[y, start:end] = row[start:end][indices]
+
+    # Rotate back
+    if angle != 0:
+        result = ndimage.rotate(result, angle, reshape=False, mode='reflect')
+
+    return result
+
+
+# ============================================================================
+# Primitive Registry
+# ============================================================================
+
+def map_char_grid(
+    chars,
+    luminances,
+    fn,
+):
+    """
+    Map a function over each cell of a character grid.
+
+    Args:
+        chars: 2D array/list of characters (rows, cols)
+        luminances: 2D array of luminance values
+        fn: Function or Lambda (row, col, char, luminance) -> new_char
+
+    Returns:
+        New character grid with mapped values (list of lists)
+    """
+    from .parser import Lambda
+    from .evaluator import evaluate
+
+    # Handle both list and numpy array inputs
+    if isinstance(chars, np.ndarray):
+        rows, cols = chars.shape[:2]
+    else:
+        rows = len(chars)
+        cols = len(chars[0]) if rows > 0 and isinstance(chars[0], (list, tuple, str)) else 1
+
+    # Get luminances as 2D
+    if isinstance(luminances, np.ndarray):
+        lum_arr = luminances
+    else:
+        lum_arr = np.array(luminances)
+
+    # Check if fn is a Lambda (from sexp) or a Python callable
+    is_lambda = isinstance(fn, Lambda)
+
+    result = []
+    for r in range(rows):
+        row_result = []
+        for c in range(cols):
+            # Get character
+            if isinstance(chars, np.ndarray):
+                ch = chars[r, c] if len(chars.shape) > 1 else chars[r]
+            elif isinstance(chars[r], str):
+                ch = chars[r][c] if c < len(chars[r]) else ' '
+            else:
+                ch = chars[r][c] if c < len(chars[r]) else ' '
+
+            # Get luminance
+            if len(lum_arr.shape) > 1:
+                lum = lum_arr[r, c]
+            else:
+                lum = lum_arr[r]
+
+            # Call the function
+            if is_lambda:
+                # Evaluate the Lambda with arguments bound
+                call_env = dict(fn.closure) if fn.closure else {}
+                for param, val in zip(fn.params, [r, c, ch, float(lum)]):
+                    call_env[param] = val
+                new_ch = evaluate(fn.body, call_env)
+            else:
+                new_ch = fn(r, c, ch, float(lum))
+
+            row_result.append(new_ch)
+        result.append(row_result)
+
+    return result
+
+
+def alphabet_char(alphabet: str, index: int) -> str:
+    """
+    Get a character from an alphabet at a given index.
+
+    Args:
+        alphabet: Alphabet name (from ASCII_ALPHABETS) or literal string
+        index: Index into the alphabet (clamped to valid range)
+
+    Returns:
+        Character at the index
+    """
+    # Get alphabet string
+    if alphabet in ASCII_ALPHABETS:
+        chars = ASCII_ALPHABETS[alphabet]
+    else:
+        chars = alphabet
+
+    # Clamp index to valid range
+    index = int(index)
+    index = max(0, min(index, len(chars) - 1))
+
+    return chars[index]
+
+
+PRIMITIVES = {
+    # ASCII
+    "cell-sample": cell_sample,
+    "luminance-to-chars": luminance_to_chars,
+    "render-char-grid": render_char_grid,
+    "map-char-grid": map_char_grid,
+    "alphabet-char": alphabet_char,
+    "ascii_art_frame": ascii_art_frame,
+    "ascii_zones_frame": ascii_zones_frame,
+
+    # Kaleidoscope
+    "kaleidoscope-displace": kaleidoscope_displace,
+    "remap": remap,
+    "kaleidoscope_frame": kaleidoscope_frame,
+
+    # Datamosh
+    "datamosh": datamosh_frame,
+    "datamosh_frame": datamosh_frame,
+
+    # Pixelsort
+    "pixelsort": pixelsort_frame,
+    "pixelsort_frame": pixelsort_frame,
+}
+
+
+def get_primitive(name: str):
+    """Get a primitive function by name."""
+    return PRIMITIVES.get(name)
+
+
+def list_primitives() -> List[str]:
+    """List all available primitives."""
+    return list(PRIMITIVES.keys())
--- a/core/artdag/sexp/scheduler.py
+++ b/core/artdag/sexp/scheduler.py
@@ -0,0 +1,779 @@
+"""
+Celery scheduler for S-expression execution plans.
+
+Distributes plan steps to workers as S-expressions.
+The S-expression is the canonical format - workers receive
+serialized S-expressions and can verify cache_ids by hashing them.
+
+Usage:
+    from artdag.sexp import compile_string, create_plan
+    from artdag.sexp.scheduler import schedule_plan
+
+    recipe = compile_string(sexp_content)
+    plan = create_plan(recipe, inputs={'video': 'abc123...'})
+    result = schedule_plan(plan)
+"""
+
+import hashlib
+import json
+import logging
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Callable
+
+from .parser import Symbol, Keyword, serialize, parse
+from .planner import ExecutionPlanSexp, PlanStep
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class StepResult:
+    """Result from executing a step."""
+    step_id: str
+    cache_id: str
+    status: str  # 'completed', 'cached', 'failed', 'pending'
+    output_path: Optional[str] = None
+    error: Optional[str] = None
+    ipfs_cid: Optional[str] = None
+
+
+@dataclass
+class PlanResult:
+    """Result from executing a complete plan."""
+    plan_id: str
+    status: str  # 'completed', 'failed', 'partial'
+    steps_completed: int = 0
+    steps_cached: int = 0
+    steps_failed: int = 0
+    output_cache_id: Optional[str] = None
+    output_path: Optional[str] = None
+    output_ipfs_cid: Optional[str] = None
+    step_results: Dict[str, StepResult] = field(default_factory=dict)
+    error: Optional[str] = None
+
+
+def step_to_sexp(step: PlanStep) -> List:
+    """
+    Convert a PlanStep to minimal S-expression for worker.
+
+    This is the canonical form that workers receive.
+    Workers can verify cache_id by hashing this S-expression.
+    """
+    sexp = [Symbol(step.node_type.lower())]
+
+    # Add config as keywords
+    for key, value in step.config.items():
+        sexp.extend([Keyword(key.replace('_', '-')), value])
+
+    # Add inputs as cache IDs (not step IDs)
+    if step.inputs:
+        sexp.extend([Keyword("inputs"), step.inputs])
+
+    return sexp
+
+
+def step_sexp_to_string(step: PlanStep) -> str:
+    """Serialize step to S-expression string for Celery task."""
+    return serialize(step_to_sexp(step))
+
+
+def verify_step_cache_id(step_sexp: str, expected_cache_id: str, cluster_key: str = None) -> bool:
+    """
+    Verify that a step's cache_id matches its S-expression.
+
+    Workers should call this to verify they're executing the correct task.
+    """
+    data = {"sexp": step_sexp}
+    if cluster_key:
+        data = {"_cluster_key": cluster_key, "_data": data}
+
+    json_str = json.dumps(data, sort_keys=True, separators=(",", ":"))
+    computed = hashlib.sha3_256(json_str.encode()).hexdigest()
+    return computed == expected_cache_id
+
+
+class PlanScheduler:
+    """
+    Schedules execution of S-expression plans on Celery workers.
+
+    The scheduler:
+    1. Groups steps by dependency level
+    2. Checks cache for already-computed results
+    3. Dispatches uncached steps to workers
+    4. Waits for completion before proceeding to next level
+    """
+
+    def __init__(
+        self,
+        cache_manager=None,
+        celery_app=None,
+        execute_task_name: str = 'tasks.execute_step_sexp',
+    ):
+        """
+        Initialize the scheduler.
+
+        Args:
+            cache_manager: L1 cache manager for checking cached results
+            celery_app: Celery application instance
+            execute_task_name: Name of the Celery task for step execution
+        """
+        self.cache_manager = cache_manager
+        self.celery_app = celery_app
+        self.execute_task_name = execute_task_name
+
+    def schedule(
+        self,
+        plan: ExecutionPlanSexp,
+        timeout: int = 3600,
+    ) -> PlanResult:
+        """
+        Schedule and execute a plan.
+
+        Args:
+            plan: The execution plan (S-expression format)
+            timeout: Timeout in seconds for the entire plan
+
+        Returns:
+            PlanResult with execution results
+        """
+        from celery import group
+
+        logger.info(f"Scheduling plan {plan.plan_id[:16]}... ({len(plan.steps)} steps)")
+
+        # Build step lookup and group by level
+        steps_by_id = {s.step_id: s for s in plan.steps}
+        steps_by_level = self._group_by_level(plan.steps)
+        max_level = max(steps_by_level.keys()) if steps_by_level else 0
+
+        # Track results
+        result = PlanResult(
+            plan_id=plan.plan_id,
+            status="pending",
+        )
+
+        # Map step_id -> cache_id for resolving inputs
+        cache_ids = dict(plan.inputs)  # Start with input hashes
+        for step in plan.steps:
+            cache_ids[step.step_id] = step.cache_id
+
+        # Execute level by level
+        for level in range(max_level + 1):
+            level_steps = steps_by_level.get(level, [])
+            if not level_steps:
+                continue
+
+            logger.info(f"Level {level}: {len(level_steps)} steps")
+
+            # Check cache for each step
+            steps_to_run = []
+            for step in level_steps:
+                if self._is_cached(step.cache_id):
+                    result.steps_cached += 1
+                    result.step_results[step.step_id] = StepResult(
+                        step_id=step.step_id,
+                        cache_id=step.cache_id,
+                        status="cached",
+                        output_path=self._get_cached_path(step.cache_id),
+                    )
+                else:
+                    steps_to_run.append(step)
+
+            if not steps_to_run:
+                logger.info(f"Level {level}: all {len(level_steps)} steps cached")
+                continue
+
+            # Dispatch uncached steps to workers
+            logger.info(f"Level {level}: dispatching {len(steps_to_run)} steps")
+
+            tasks = []
+            for step in steps_to_run:
+                # Build task arguments
+                step_sexp = step_sexp_to_string(step)
+                input_cache_ids = {
+                    inp: cache_ids.get(inp, inp)
+                    for inp in step.inputs
+                }
+
+                task = self._get_execute_task().s(
+                    step_sexp=step_sexp,
+                    step_id=step.step_id,
+                    cache_id=step.cache_id,
+                    plan_id=plan.plan_id,
+                    input_cache_ids=input_cache_ids,
+                )
+                tasks.append(task)
+
+            # Execute in parallel
+            job = group(tasks)
+            async_result = job.apply_async()
+
+            try:
+                step_results = async_result.get(timeout=timeout)
+            except Exception as e:
+                logger.error(f"Level {level} failed: {e}")
+                result.status = "failed"
+                result.error = f"Level {level} failed: {e}"
+                return result
+
+            # Process results
+            for step_result in step_results:
+                step_id = step_result.get("step_id")
+                status = step_result.get("status")
+
+                result.step_results[step_id] = StepResult(
+                    step_id=step_id,
+                    cache_id=step_result.get("cache_id"),
+                    status=status,
+                    output_path=step_result.get("output_path"),
+                    error=step_result.get("error"),
+                    ipfs_cid=step_result.get("ipfs_cid"),
+                )
+
+                if status in ("completed", "cached", "completed_by_other"):
+                    result.steps_completed += 1
+                elif status == "failed":
+                    result.steps_failed += 1
+                    result.status = "failed"
+                    result.error = step_result.get("error")
+                    return result
+
+        # Get final output
+        output_step = steps_by_id.get(plan.output_step_id)
+        if output_step:
+            output_result = result.step_results.get(output_step.step_id)
+            if output_result:
+                result.output_cache_id = output_step.cache_id
+                result.output_path = output_result.output_path
+                result.output_ipfs_cid = output_result.ipfs_cid
+
+        result.status = "completed"
+        logger.info(
+            f"Plan {plan.plan_id[:16]}... completed: "
+            f"{result.steps_completed} executed, {result.steps_cached} cached"
+        )
+        return result
+
+    def _group_by_level(self, steps: List[PlanStep]) -> Dict[int, List[PlanStep]]:
+        """Group steps by dependency level."""
+        by_level = {}
+        for step in steps:
+            by_level.setdefault(step.level, []).append(step)
+        return by_level
+
+    def _is_cached(self, cache_id: str) -> bool:
+        """Check if a cache_id exists in cache."""
+        if self.cache_manager is None:
+            return False
+        path = self.cache_manager.get_by_cid(cache_id)
+        return path is not None
+
+    def _get_cached_path(self, cache_id: str) -> Optional[str]:
+        """Get the path for a cached item."""
+        if self.cache_manager is None:
+            return None
+        path = self.cache_manager.get_by_cid(cache_id)
+        return str(path) if path else None
+
+    def _get_execute_task(self):
+        """Get the Celery task for step execution."""
+        if self.celery_app is None:
+            raise RuntimeError("Celery app not configured")
+        return self.celery_app.tasks[self.execute_task_name]
+
+
+def create_scheduler(cache_manager=None, celery_app=None) -> PlanScheduler:
+    """
+    Create a scheduler with the given dependencies.
+
+    If not provided, attempts to import from art-celery.
+    """
+    if celery_app is None:
+        try:
+            from celery_app import app as celery_app
+        except ImportError:
+            pass
+
+    if cache_manager is None:
+        try:
+            from cache_manager import get_cache_manager
+            cache_manager = get_cache_manager()
+        except ImportError:
+            pass
+
+    return PlanScheduler(
+        cache_manager=cache_manager,
+        celery_app=celery_app,
+    )
+
+
+def schedule_plan(
+    plan: ExecutionPlanSexp,
+    cache_manager=None,
+    celery_app=None,
+    timeout: int = 3600,
+) -> PlanResult:
+    """
+    Convenience function to schedule a plan.
+
+    Args:
+        plan: The execution plan
+        cache_manager: Optional cache manager
+        celery_app: Optional Celery app
+        timeout: Execution timeout
+
+    Returns:
+        PlanResult
+    """
+    scheduler = create_scheduler(cache_manager, celery_app)
+    return scheduler.schedule(plan, timeout=timeout)
+
+
+# Stage-aware scheduling
+
+@dataclass
+class StageResult:
+    """Result from executing a stage."""
+    stage_name: str
+    cache_id: str
+    status: str  # 'completed', 'cached', 'failed', 'pending'
+    step_results: Dict[str, StepResult] = field(default_factory=dict)
+    outputs: Dict[str, str] = field(default_factory=dict)  # binding_name -> cache_id
+    error: Optional[str] = None
+
+
+@dataclass
+class StagePlanResult:
+    """Result from executing a plan with stages."""
+    plan_id: str
+    status: str  # 'completed', 'failed', 'partial'
+    stages_completed: int = 0
+    stages_cached: int = 0
+    stages_failed: int = 0
+    steps_completed: int = 0
+    steps_cached: int = 0
+    steps_failed: int = 0
+    stage_results: Dict[str, StageResult] = field(default_factory=dict)
+    output_cache_id: Optional[str] = None
+    output_path: Optional[str] = None
+    error: Optional[str] = None
+
+
+class StagePlanScheduler:
+    """
+    Stage-aware scheduler for S-expression plans.
+
+    The scheduler:
+    1. Groups stages by level (parallel groups)
+    2. For each stage level:
+       - Check stage cache, skip entire stage if hit
+       - Execute stage steps (grouped by level within stage)
+       - Cache stage outputs
+    3. Stages at same level can run in parallel
+    """
+
+    def __init__(
+        self,
+        cache_manager=None,
+        stage_cache=None,
+        celery_app=None,
+        execute_task_name: str = 'tasks.execute_step_sexp',
+    ):
+        """
+        Initialize the stage-aware scheduler.
+
+        Args:
+            cache_manager: L1 cache manager for step-level caching
+            stage_cache: StageCache instance for stage-level caching
+            celery_app: Celery application instance
+            execute_task_name: Name of the Celery task for step execution
+        """
+        self.cache_manager = cache_manager
+        self.stage_cache = stage_cache
+        self.celery_app = celery_app
+        self.execute_task_name = execute_task_name
+
+    def schedule(
+        self,
+        plan: ExecutionPlanSexp,
+        timeout: int = 3600,
+    ) -> StagePlanResult:
+        """
+        Schedule and execute a plan with stage awareness.
+
+        If the plan has stages, uses stage-level scheduling.
+        Otherwise, falls back to step-level scheduling.
+
+        Args:
+            plan: The execution plan (S-expression format)
+            timeout: Timeout in seconds for the entire plan
+
+        Returns:
+            StagePlanResult with execution results
+        """
+        # If no stages, use regular scheduling
+        if not plan.stage_plans:
+            logger.info("Plan has no stages, using step-level scheduling")
+            regular_scheduler = PlanScheduler(
+                cache_manager=self.cache_manager,
+                celery_app=self.celery_app,
+                execute_task_name=self.execute_task_name,
+            )
+            step_result = regular_scheduler.schedule(plan, timeout)
+            return StagePlanResult(
+                plan_id=step_result.plan_id,
+                status=step_result.status,
+                steps_completed=step_result.steps_completed,
+                steps_cached=step_result.steps_cached,
+                steps_failed=step_result.steps_failed,
+                output_cache_id=step_result.output_cache_id,
+                output_path=step_result.output_path,
+                error=step_result.error,
+            )
+
+        logger.info(
+            f"Scheduling plan {plan.plan_id[:16]}... "
+            f"({len(plan.stage_plans)} stages, {len(plan.steps)} steps)"
+        )
+
+        result = StagePlanResult(
+            plan_id=plan.plan_id,
+            status="pending",
+        )
+
+        # Group stages by level
+        stages_by_level = self._group_stages_by_level(plan.stage_plans)
+        max_level = max(stages_by_level.keys()) if stages_by_level else 0
+
+        # Track stage outputs for data flow
+        stage_outputs = {}  # stage_name -> {binding_name -> cache_id}
+
+        # Execute stage by stage level
+        for level in range(max_level + 1):
+            level_stages = stages_by_level.get(level, [])
+            if not level_stages:
+                continue
+
+            logger.info(f"Stage level {level}: {len(level_stages)} stages")
+
+            # Check stage cache for each stage
+            stages_to_run = []
+            for stage_plan in level_stages:
+                if self._is_stage_cached(stage_plan.cache_id):
+                    result.stages_cached += 1
+                    cached_entry = self._load_cached_stage(stage_plan.cache_id)
+                    if cached_entry:
+                        stage_outputs[stage_plan.stage_name] = {
+                            name: out.cache_id
+                            for name, out in cached_entry.outputs.items()
+                        }
+                        result.stage_results[stage_plan.stage_name] = StageResult(
+                            stage_name=stage_plan.stage_name,
+                            cache_id=stage_plan.cache_id,
+                            status="cached",
+                            outputs=stage_outputs[stage_plan.stage_name],
+                        )
+                    logger.info(f"Stage {stage_plan.stage_name}: cached")
+                else:
+                    stages_to_run.append(stage_plan)
+
+            if not stages_to_run:
+                logger.info(f"Stage level {level}: all {len(level_stages)} stages cached")
+                continue
+
+            # Execute uncached stages
+            # For now, execute sequentially; L1 Celery will add parallel execution
+            for stage_plan in stages_to_run:
+                logger.info(f"Executing stage: {stage_plan.stage_name}")
+
+                stage_result = self._execute_stage(
+                    stage_plan,
+                    plan,
+                    stage_outputs,
+                    timeout,
+                )
+
+                result.stage_results[stage_plan.stage_name] = stage_result
+
+                if stage_result.status == "completed":
+                    result.stages_completed += 1
+                    stage_outputs[stage_plan.stage_name] = stage_result.outputs
+
+                    # Cache the stage result
+                    self._cache_stage(stage_plan, stage_result)
+                elif stage_result.status == "failed":
+                    result.stages_failed += 1
+                    result.status = "failed"
+                    result.error = stage_result.error
+                    return result
+
+                # Accumulate step counts
+                for sr in stage_result.step_results.values():
+                    if sr.status == "completed":
+                        result.steps_completed += 1
+                    elif sr.status == "cached":
+                        result.steps_cached += 1
+                    elif sr.status == "failed":
+                        result.steps_failed += 1
+
+        # Get final output
+        if plan.stage_plans:
+            last_stage = plan.stage_plans[-1]
+            if last_stage.stage_name in result.stage_results:
+                stage_res = result.stage_results[last_stage.stage_name]
+                result.output_cache_id = last_stage.cache_id
+                # Find the output step's path from step results
+                for step_res in stage_res.step_results.values():
+                    if step_res.output_path:
+                        result.output_path = step_res.output_path
+
+        result.status = "completed"
+        logger.info(
+            f"Plan {plan.plan_id[:16]}... completed: "
+            f"{result.stages_completed} stages executed, "
+            f"{result.stages_cached} stages cached"
+        )
+        return result
+
+    def _group_stages_by_level(self, stage_plans: List) -> Dict[int, List]:
+        """Group stage plans by their level."""
+        by_level = {}
+        for stage_plan in stage_plans:
+            by_level.setdefault(stage_plan.level, []).append(stage_plan)
+        return by_level
+
+    def _is_stage_cached(self, cache_id: str) -> bool:
+        """Check if a stage is cached."""
+        if self.stage_cache is None:
+            return False
+        return self.stage_cache.has_stage(cache_id)
+
+    def _load_cached_stage(self, cache_id: str):
+        """Load a cached stage entry."""
+        if self.stage_cache is None:
+            return None
+        return self.stage_cache.load_stage(cache_id)
+
+    def _cache_stage(self, stage_plan, stage_result: StageResult) -> None:
+        """Cache a stage result."""
+        if self.stage_cache is None:
+            return
+
+        from .stage_cache import StageCacheEntry, StageOutput
+
+        outputs = {}
+        for name, cache_id in stage_result.outputs.items():
+            outputs[name] = StageOutput(
+                cache_id=cache_id,
+                output_type="artifact",
+            )
+
+        entry = StageCacheEntry(
+            stage_name=stage_plan.stage_name,
+            cache_id=stage_plan.cache_id,
+            outputs=outputs,
+        )
+        self.stage_cache.save_stage(entry)
+
+    def _execute_stage(
+        self,
+        stage_plan,
+        plan: ExecutionPlanSexp,
+        stage_outputs: Dict,
+        timeout: int,
+    ) -> StageResult:
+        """
+        Execute a single stage.
+
+        Uses the PlanScheduler to execute the stage's steps.
+        """
+        # Create a mini-plan with just this stage's steps
+        stage_steps = stage_plan.steps
+
+        # Build step lookup
+        steps_by_id = {s.step_id: s for s in stage_steps}
+        steps_by_level = {}
+        for step in stage_steps:
+            steps_by_level.setdefault(step.level, []).append(step)
+
+        max_level = max(steps_by_level.keys()) if steps_by_level else 0
+
+        # Track step results
+        step_results = {}
+        cache_ids = dict(plan.inputs)  # Start with input hashes
+        for step in plan.steps:
+            cache_ids[step.step_id] = step.cache_id
+
+        # Include outputs from previous stages
+        for stage_name, outputs in stage_outputs.items():
+            for binding_name, binding_cache_id in outputs.items():
+                cache_ids[binding_name] = binding_cache_id
+
+        # Execute steps level by level
+        for level in range(max_level + 1):
+            level_steps = steps_by_level.get(level, [])
+            if not level_steps:
+                continue
+
+            # Check cache for each step
+            steps_to_run = []
+            for step in level_steps:
+                if self._is_step_cached(step.cache_id):
+                    step_results[step.step_id] = StepResult(
+                        step_id=step.step_id,
+                        cache_id=step.cache_id,
+                        status="cached",
+                        output_path=self._get_cached_path(step.cache_id),
+                    )
+                else:
+                    steps_to_run.append(step)
+
+            if not steps_to_run:
+                continue
+
+            # Execute steps (for now, sequentially - L1 will add Celery dispatch)
+            for step in steps_to_run:
+                # In a full implementation, this would dispatch to Celery
+                # For now, mark as pending
+                step_results[step.step_id] = StepResult(
+                    step_id=step.step_id,
+                    cache_id=step.cache_id,
+                    status="pending",
+                )
+
+                # If Celery is configured, dispatch the task
+                if self.celery_app:
+                    try:
+                        task_result = self._dispatch_step(step, cache_ids, timeout)
+                        step_results[step.step_id] = StepResult(
+                            step_id=step.step_id,
+                            cache_id=step.cache_id,
+                            status=task_result.get("status", "completed"),
+                            output_path=task_result.get("output_path"),
+                            error=task_result.get("error"),
+                            ipfs_cid=task_result.get("ipfs_cid"),
+                        )
+                    except Exception as e:
+                        step_results[step.step_id] = StepResult(
+                            step_id=step.step_id,
+                            cache_id=step.cache_id,
+                            status="failed",
+                            error=str(e),
+                        )
+                        return StageResult(
+                            stage_name=stage_plan.stage_name,
+                            cache_id=stage_plan.cache_id,
+                            status="failed",
+                            step_results=step_results,
+                            error=str(e),
+                        )
+
+        # Build output bindings
+        outputs = {}
+        for out_name, node_id in stage_plan.output_bindings.items():
+            outputs[out_name] = cache_ids.get(node_id, node_id)
+
+        return StageResult(
+            stage_name=stage_plan.stage_name,
+            cache_id=stage_plan.cache_id,
+            status="completed",
+            step_results=step_results,
+            outputs=outputs,
+        )
+
+    def _is_step_cached(self, cache_id: str) -> bool:
+        """Check if a step is cached."""
+        if self.cache_manager is None:
+            return False
+        path = self.cache_manager.get_by_cid(cache_id)
+        return path is not None
+
+    def _get_cached_path(self, cache_id: str) -> Optional[str]:
+        """Get the path for a cached step."""
+        if self.cache_manager is None:
+            return None
+        path = self.cache_manager.get_by_cid(cache_id)
+        return str(path) if path else None
+
+    def _dispatch_step(self, step, cache_ids: Dict, timeout: int) -> Dict:
+        """Dispatch a step to Celery for execution."""
+        if self.celery_app is None:
+            raise RuntimeError("Celery app not configured")
+
+        task = self.celery_app.tasks[self.execute_task_name]
+
+        step_sexp = step_sexp_to_string(step)
+        input_cache_ids = {
+            inp: cache_ids.get(inp, inp)
+            for inp in step.inputs
+        }
+
+        async_result = task.apply_async(
+            kwargs={
+                "step_sexp": step_sexp,
+                "step_id": step.step_id,
+                "cache_id": step.cache_id,
+                "input_cache_ids": input_cache_ids,
+            }
+        )
+
+        return async_result.get(timeout=timeout)
+
+
+def create_stage_scheduler(
+    cache_manager=None,
+    stage_cache=None,
+    celery_app=None,
+) -> StagePlanScheduler:
+    """
+    Create a stage-aware scheduler with the given dependencies.
+
+    Args:
+        cache_manager: L1 cache manager for step-level caching
+        stage_cache: StageCache instance for stage-level caching
+        celery_app: Celery application instance
+
+    Returns:
+        StagePlanScheduler
+    """
+    if celery_app is None:
+        try:
+            from celery_app import app as celery_app
+        except ImportError:
+            pass
+
+    if cache_manager is None:
+        try:
+            from cache_manager import get_cache_manager
+            cache_manager = get_cache_manager()
+        except ImportError:
+            pass
+
+    return StagePlanScheduler(
+        cache_manager=cache_manager,
+        stage_cache=stage_cache,
+        celery_app=celery_app,
+    )
+
+
+def schedule_staged_plan(
+    plan: ExecutionPlanSexp,
+    cache_manager=None,
+    stage_cache=None,
+    celery_app=None,
+    timeout: int = 3600,
+) -> StagePlanResult:
+    """
+    Convenience function to schedule a plan with stage awareness.
+
+    Args:
+        plan: The execution plan
+        cache_manager: Optional step-level cache manager
+        stage_cache: Optional stage-level cache
+        celery_app: Optional Celery app
+        timeout: Execution timeout
+
+    Returns:
+        StagePlanResult
+    """
+    scheduler = create_stage_scheduler(cache_manager, stage_cache, celery_app)
+    return scheduler.schedule(plan, timeout=timeout)
--- a/core/artdag/sexp/stage_cache.py
+++ b/core/artdag/sexp/stage_cache.py
@@ -0,0 +1,412 @@
+"""
+Stage-level cache layer using S-expression storage.
+
+Provides caching for stage outputs, enabling:
+- Stage-level cache hits (skip entire stage execution)
+- Analysis result persistence as sexp
+- Cross-worker stage cache sharing (for L1 Celery integration)
+
+All cache files use .sexp extension - no JSON in the pipeline.
+"""
+
+import os
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
+from .parser import Symbol, Keyword, parse, serialize
+
+
+@dataclass
+class StageOutput:
+    """A single output from a stage."""
+    cache_id: Optional[str] = None  # For artifacts (files, analysis data)
+    value: Any = None  # For scalar values
+    output_type: str = "artifact"  # "artifact", "analysis", "scalar"
+
+    def to_sexp(self) -> List:
+        """Convert to S-expression."""
+        sexp = []
+        if self.cache_id:
+            sexp.extend([Keyword("cache-id"), self.cache_id])
+        if self.value is not None:
+            sexp.extend([Keyword("value"), self.value])
+        sexp.extend([Keyword("type"), Keyword(self.output_type)])
+        return sexp
+
+    @classmethod
+    def from_sexp(cls, sexp: List) -> 'StageOutput':
+        """Parse from S-expression list."""
+        cache_id = None
+        value = None
+        output_type = "artifact"
+
+        i = 0
+        while i < len(sexp):
+            if isinstance(sexp[i], Keyword):
+                key = sexp[i].name
+                if i + 1 < len(sexp):
+                    val = sexp[i + 1]
+                    if key == "cache-id":
+                        cache_id = val
+                    elif key == "value":
+                        value = val
+                    elif key == "type":
+                        if isinstance(val, Keyword):
+                            output_type = val.name
+                        else:
+                            output_type = str(val)
+                    i += 2
+                else:
+                    i += 1
+            else:
+                i += 1
+
+        return cls(cache_id=cache_id, value=value, output_type=output_type)
+
+
+@dataclass
+class StageCacheEntry:
+    """Cached result of a stage execution."""
+    stage_name: str
+    cache_id: str
+    outputs: Dict[str, StageOutput]  # binding_name -> output info
+    completed_at: float = field(default_factory=time.time)
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def to_sexp(self) -> List:
+        """
+        Convert to S-expression for storage.
+
+        Format:
+            (stage-result
+              :name "analyze-a"
+              :cache-id "abc123..."
+              :completed-at 1705678900.123
+              :outputs
+                ((beats-a :cache-id "def456..." :type :analysis)
+                 (tempo :value 120.5 :type :scalar)))
+        """
+        sexp = [Symbol("stage-result")]
+        sexp.extend([Keyword("name"), self.stage_name])
+        sexp.extend([Keyword("cache-id"), self.cache_id])
+        sexp.extend([Keyword("completed-at"), self.completed_at])
+
+        if self.outputs:
+            outputs_sexp = []
+            for name, output in self.outputs.items():
+                output_sexp = [Symbol(name)] + output.to_sexp()
+                outputs_sexp.append(output_sexp)
+            sexp.extend([Keyword("outputs"), outputs_sexp])
+
+        if self.metadata:
+            sexp.extend([Keyword("metadata"), self.metadata])
+
+        return sexp
+
+    def to_string(self, pretty: bool = True) -> str:
+        """Serialize to S-expression string."""
+        return serialize(self.to_sexp(), pretty=pretty)
+
+    @classmethod
+    def from_sexp(cls, sexp: List) -> 'StageCacheEntry':
+        """Parse from S-expression."""
+        if not sexp or not isinstance(sexp[0], Symbol) or sexp[0].name != "stage-result":
+            raise ValueError("Invalid stage-result sexp")
+
+        stage_name = None
+        cache_id = None
+        completed_at = time.time()
+        outputs = {}
+        metadata = {}
+
+        i = 1
+        while i < len(sexp):
+            if isinstance(sexp[i], Keyword):
+                key = sexp[i].name
+                if i + 1 < len(sexp):
+                    val = sexp[i + 1]
+                    if key == "name":
+                        stage_name = val
+                    elif key == "cache-id":
+                        cache_id = val
+                    elif key == "completed-at":
+                        completed_at = float(val)
+                    elif key == "outputs":
+                        if isinstance(val, list):
+                            for output_sexp in val:
+                                if isinstance(output_sexp, list) and output_sexp:
+                                    out_name = output_sexp[0]
+                                    if isinstance(out_name, Symbol):
+                                        out_name = out_name.name
+                                    outputs[out_name] = StageOutput.from_sexp(output_sexp[1:])
+                    elif key == "metadata":
+                        metadata = val if isinstance(val, dict) else {}
+                    i += 2
+                else:
+                    i += 1
+            else:
+                i += 1
+
+        if not stage_name or not cache_id:
+            raise ValueError("stage-result missing required fields (name, cache-id)")
+
+        return cls(
+            stage_name=stage_name,
+            cache_id=cache_id,
+            outputs=outputs,
+            completed_at=completed_at,
+            metadata=metadata,
+        )
+
+    @classmethod
+    def from_string(cls, text: str) -> 'StageCacheEntry':
+        """Parse from S-expression string."""
+        sexp = parse(text)
+        return cls.from_sexp(sexp)
+
+
+class StageCache:
+    """
+    Stage-level cache manager using S-expression files.
+
+    Cache structure:
+        cache_dir/
+            _stages/
+                {cache_id}.sexp   <- Stage result files
+    """
+
+    def __init__(self, cache_dir: Union[str, Path]):
+        """
+        Initialize stage cache.
+
+        Args:
+            cache_dir: Base cache directory
+        """
+        self.cache_dir = Path(cache_dir)
+        self.stages_dir = self.cache_dir / "_stages"
+        self.stages_dir.mkdir(parents=True, exist_ok=True)
+
+    def get_cache_path(self, cache_id: str) -> Path:
+        """Get the path for a stage cache file."""
+        return self.stages_dir / f"{cache_id}.sexp"
+
+    def has_stage(self, cache_id: str) -> bool:
+        """Check if a stage result is cached."""
+        return self.get_cache_path(cache_id).exists()
+
+    def load_stage(self, cache_id: str) -> Optional[StageCacheEntry]:
+        """
+        Load a cached stage result.
+
+        Args:
+            cache_id: Stage cache ID
+
+        Returns:
+            StageCacheEntry if found, None otherwise
+        """
+        path = self.get_cache_path(cache_id)
+        if not path.exists():
+            return None
+
+        try:
+            content = path.read_text()
+            return StageCacheEntry.from_string(content)
+        except Exception as e:
+            # Corrupted cache file - log and return None
+            import sys
+            print(f"Warning: corrupted stage cache {cache_id}: {e}", file=sys.stderr)
+            return None
+
+    def save_stage(self, entry: StageCacheEntry) -> Path:
+        """
+        Save a stage result to cache.
+
+        Args:
+            entry: Stage cache entry to save
+
+        Returns:
+            Path to the saved cache file
+        """
+        path = self.get_cache_path(entry.cache_id)
+        content = entry.to_string(pretty=True)
+        path.write_text(content)
+        return path
+
+    def delete_stage(self, cache_id: str) -> bool:
+        """
+        Delete a cached stage result.
+
+        Args:
+            cache_id: Stage cache ID
+
+        Returns:
+            True if deleted, False if not found
+        """
+        path = self.get_cache_path(cache_id)
+        if path.exists():
+            path.unlink()
+            return True
+        return False
+
+    def list_stages(self) -> List[str]:
+        """List all cached stage IDs."""
+        return [
+            p.stem for p in self.stages_dir.glob("*.sexp")
+        ]
+
+    def clear(self) -> int:
+        """
+        Clear all cached stages.
+
+        Returns:
+            Number of entries cleared
+        """
+        count = 0
+        for path in self.stages_dir.glob("*.sexp"):
+            path.unlink()
+            count += 1
+        return count
+
+
+@dataclass
+class AnalysisResult:
+    """
+    Analysis result stored as S-expression.
+
+    Format:
+        (analysis-result
+          :analyzer "beats"
+          :input-hash "abc123..."
+          :duration 120.5
+          :tempo 128.0
+          :times (0.0 0.468 0.937 1.406 ...)
+          :values (0.8 0.9 0.7 0.85 ...))
+    """
+    analyzer: str
+    input_hash: str
+    data: Dict[str, Any]  # Analysis data (times, values, duration, etc.)
+    computed_at: float = field(default_factory=time.time)
+
+    def to_sexp(self) -> List:
+        """Convert to S-expression."""
+        sexp = [Symbol("analysis-result")]
+        sexp.extend([Keyword("analyzer"), self.analyzer])
+        sexp.extend([Keyword("input-hash"), self.input_hash])
+        sexp.extend([Keyword("computed-at"), self.computed_at])
+
+        # Add all data fields
+        for key, value in self.data.items():
+            # Convert key to keyword
+            sexp.extend([Keyword(key.replace("_", "-")), value])
+
+        return sexp
+
+    def to_string(self, pretty: bool = True) -> str:
+        """Serialize to S-expression string."""
+        return serialize(self.to_sexp(), pretty=pretty)
+
+    @classmethod
+    def from_sexp(cls, sexp: List) -> 'AnalysisResult':
+        """Parse from S-expression."""
+        if not sexp or not isinstance(sexp[0], Symbol) or sexp[0].name != "analysis-result":
+            raise ValueError("Invalid analysis-result sexp")
+
+        analyzer = None
+        input_hash = None
+        computed_at = time.time()
+        data = {}
+
+        i = 1
+        while i < len(sexp):
+            if isinstance(sexp[i], Keyword):
+                key = sexp[i].name
+                if i + 1 < len(sexp):
+                    val = sexp[i + 1]
+                    if key == "analyzer":
+                        analyzer = val
+                    elif key == "input-hash":
+                        input_hash = val
+                    elif key == "computed-at":
+                        computed_at = float(val)
+                    else:
+                        # Convert kebab-case back to snake_case
+                        data_key = key.replace("-", "_")
+                        data[data_key] = val
+                    i += 2
+                else:
+                    i += 1
+            else:
+                i += 1
+
+        if not analyzer:
+            raise ValueError("analysis-result missing analyzer field")
+
+        return cls(
+            analyzer=analyzer,
+            input_hash=input_hash or "",
+            data=data,
+            computed_at=computed_at,
+        )
+
+    @classmethod
+    def from_string(cls, text: str) -> 'AnalysisResult':
+        """Parse from S-expression string."""
+        sexp = parse(text)
+        return cls.from_sexp(sexp)
+
+
+def save_analysis_result(
+    cache_dir: Union[str, Path],
+    node_id: str,
+    result: AnalysisResult,
+) -> Path:
+    """
+    Save an analysis result as S-expression.
+
+    Args:
+        cache_dir: Base cache directory
+        node_id: Node ID for the analysis
+        result: Analysis result to save
+
+    Returns:
+        Path to the saved file
+    """
+    cache_dir = Path(cache_dir)
+    node_dir = cache_dir / node_id
+    node_dir.mkdir(parents=True, exist_ok=True)
+
+    path = node_dir / "analysis.sexp"
+    content = result.to_string(pretty=True)
+    path.write_text(content)
+    return path
+
+
+def load_analysis_result(
+    cache_dir: Union[str, Path],
+    node_id: str,
+) -> Optional[AnalysisResult]:
+    """
+    Load an analysis result from cache.
+
+    Args:
+        cache_dir: Base cache directory
+        node_id: Node ID for the analysis
+
+    Returns:
+        AnalysisResult if found, None otherwise
+    """
+    cache_dir = Path(cache_dir)
+    path = cache_dir / node_id / "analysis.sexp"
+
+    if not path.exists():
+        return None
+
+    try:
+        content = path.read_text()
+        return AnalysisResult.from_string(content)
+    except Exception as e:
+        import sys
+        print(f"Warning: corrupted analysis cache {node_id}: {e}", file=sys.stderr)
+        return None
--- a/core/artdag/sexp/test_ffmpeg_compiler.py
+++ b/core/artdag/sexp/test_ffmpeg_compiler.py
@@ -0,0 +1,146 @@
+"""
+Tests for FFmpeg filter compilation.
+
+Validates that each filter mapping produces valid FFmpeg commands.
+"""
+
+import subprocess
+import tempfile
+from pathlib import Path
+
+from .ffmpeg_compiler import FFmpegCompiler, EFFECT_MAPPINGS
+
+
+def test_filter_syntax(filter_str: str, duration: float = 0.1, is_complex: bool = False) -> tuple[bool, str]:
+    """
+    Test if an FFmpeg filter string is valid by running it on a test pattern.
+
+    Args:
+        filter_str: The filter string to test
+        duration: Duration of test video
+        is_complex: If True, use -filter_complex instead of -vf
+
+    Returns (success, error_message)
+    """
+    with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as f:
+        output_path = f.name
+
+    try:
+        if is_complex:
+            # Complex filter graph needs -filter_complex and explicit output mapping
+            cmd = [
+                'ffmpeg', '-y',
+                '-f', 'lavfi', '-i', f'testsrc=duration={duration}:size=64x64:rate=10',
+                '-f', 'lavfi', '-i', f'sine=frequency=440:duration={duration}',
+                '-filter_complex', f'[0:v]{filter_str}[out]',
+                '-map', '[out]', '-map', '1:a',
+                '-c:v', 'libx264', '-preset', 'ultrafast',
+                '-c:a', 'aac',
+                '-t', str(duration),
+                output_path
+            ]
+        else:
+            # Simple filter uses -vf
+            cmd = [
+                'ffmpeg', '-y',
+                '-f', 'lavfi', '-i', f'testsrc=duration={duration}:size=64x64:rate=10',
+                '-f', 'lavfi', '-i', f'sine=frequency=440:duration={duration}',
+                '-vf', filter_str,
+                '-c:v', 'libx264', '-preset', 'ultrafast',
+                '-c:a', 'aac',
+                '-t', str(duration),
+                output_path
+            ]
+
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
+
+        if result.returncode == 0:
+            return True, ""
+        else:
+            # Extract relevant error
+            stderr = result.stderr
+            for line in stderr.split('\n'):
+                if 'Error' in line or 'error' in line or 'Invalid' in line:
+                    return False, line.strip()
+            return False, stderr[-500:] if len(stderr) > 500 else stderr
+    except subprocess.TimeoutExpired:
+        return False, "Timeout"
+    except Exception as e:
+        return False, str(e)
+    finally:
+        Path(output_path).unlink(missing_ok=True)
+
+
+def run_all_tests():
+    """Test all effect mappings."""
+    compiler = FFmpegCompiler()
+    results = []
+
+    for effect_name, mapping in EFFECT_MAPPINGS.items():
+        filter_name = mapping.get("filter")
+
+        # Skip effects with no FFmpeg equivalent (external tools or python primitives)
+        if filter_name is None:
+            reason = "No FFmpeg equivalent"
+            if mapping.get("external_tool"):
+                reason = f"External tool: {mapping['external_tool']}"
+            elif mapping.get("python_primitive"):
+                reason = f"Python primitive: {mapping['python_primitive']}"
+            results.append((effect_name, "SKIP", reason))
+            continue
+
+        # Check if complex filter
+        is_complex = mapping.get("complex", False)
+
+        # Build filter string
+        if "static" in mapping:
+            filter_str = f"{filter_name}={mapping['static']}"
+        else:
+            filter_str = filter_name
+
+        # Test it
+        success, error = test_filter_syntax(filter_str, is_complex=is_complex)
+
+        if success:
+            results.append((effect_name, "PASS", filter_str))
+        else:
+            results.append((effect_name, "FAIL", f"{filter_str} -> {error}"))
+
+    return results
+
+
+def print_results(results):
+    """Print test results."""
+    passed = sum(1 for _, status, _ in results if status == "PASS")
+    failed = sum(1 for _, status, _ in results if status == "FAIL")
+    skipped = sum(1 for _, status, _ in results if status == "SKIP")
+
+    print(f"\n{'='*60}")
+    print(f"FFmpeg Filter Tests: {passed} passed, {failed} failed, {skipped} skipped")
+    print(f"{'='*60}\n")
+
+    # Print failures first
+    if failed > 0:
+        print("FAILURES:")
+        for name, status, msg in results:
+            if status == "FAIL":
+                print(f"  {name}: {msg}")
+        print()
+
+    # Print passes
+    print("PASSED:")
+    for name, status, msg in results:
+        if status == "PASS":
+            print(f"  {name}: {msg}")
+
+    # Print skips
+    if skipped > 0:
+        print("\nSKIPPED (Python fallback):")
+        for name, status, msg in results:
+            if status == "SKIP":
+                print(f"  {name}")
+
+
+if __name__ == "__main__":
+    results = run_all_tests()
+    print_results(results)
--- a/core/artdag/sexp/test_primitives.py
+++ b/core/artdag/sexp/test_primitives.py
@@ -0,0 +1,201 @@
+"""
+Tests for Python primitive effects.
+
+Tests that ascii_art, ascii_zones, and other Python primitives
+can be executed via the EffectExecutor.
+"""
+
+import subprocess
+import tempfile
+from pathlib import Path
+
+import pytest
+
+try:
+    import numpy as np
+    from PIL import Image
+    HAS_DEPS = True
+except ImportError:
+    HAS_DEPS = False
+
+from .primitives import (
+    ascii_art_frame,
+    ascii_zones_frame,
+    get_primitive,
+    list_primitives,
+)
+from .ffmpeg_compiler import FFmpegCompiler
+
+
+def create_test_video(path: Path, duration: float = 0.5, size: str = "64x64") -> bool:
+    """Create a short test video using ffmpeg."""
+    cmd = [
+        "ffmpeg", "-y",
+        "-f", "lavfi", "-i", f"testsrc=duration={duration}:size={size}:rate=10",
+        "-c:v", "libx264", "-preset", "ultrafast",
+        str(path)
+    ]
+    result = subprocess.run(cmd, capture_output=True)
+    return result.returncode == 0
+
+
+@pytest.mark.skipif(not HAS_DEPS, reason="numpy/PIL not available")
+class TestPrimitives:
+    """Test primitive functions directly."""
+
+    def test_ascii_art_frame_basic(self):
+        """Test ascii_art_frame produces output of same shape."""
+        frame = np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8)
+        result = ascii_art_frame(frame, char_size=8)
+        assert result.shape == frame.shape
+        assert result.dtype == np.uint8
+
+    def test_ascii_zones_frame_basic(self):
+        """Test ascii_zones_frame produces output of same shape."""
+        frame = np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8)
+        result = ascii_zones_frame(frame, char_size=8)
+        assert result.shape == frame.shape
+        assert result.dtype == np.uint8
+
+    def test_get_primitive(self):
+        """Test primitive lookup."""
+        assert get_primitive("ascii_art_frame") is ascii_art_frame
+        assert get_primitive("ascii_zones_frame") is ascii_zones_frame
+        assert get_primitive("nonexistent") is None
+
+    def test_list_primitives(self):
+        """Test listing primitives."""
+        primitives = list_primitives()
+        assert "ascii_art_frame" in primitives
+        assert "ascii_zones_frame" in primitives
+        assert len(primitives) > 5
+
+
+class TestFFmpegCompilerPrimitives:
+    """Test FFmpegCompiler python_primitive mappings."""
+
+    def test_has_python_primitive_ascii_art(self):
+        """Test ascii_art has python_primitive."""
+        compiler = FFmpegCompiler()
+        assert compiler.has_python_primitive("ascii_art") == "ascii_art_frame"
+
+    def test_has_python_primitive_ascii_zones(self):
+        """Test ascii_zones has python_primitive."""
+        compiler = FFmpegCompiler()
+        assert compiler.has_python_primitive("ascii_zones") == "ascii_zones_frame"
+
+    def test_has_python_primitive_ffmpeg_effect(self):
+        """Test FFmpeg effects don't have python_primitive."""
+        compiler = FFmpegCompiler()
+        assert compiler.has_python_primitive("brightness") is None
+        assert compiler.has_python_primitive("blur") is None
+
+    def test_compile_effect_returns_none_for_primitives(self):
+        """Test compile_effect returns None for primitive effects."""
+        compiler = FFmpegCompiler()
+        assert compiler.compile_effect("ascii_art", {}) is None
+        assert compiler.compile_effect("ascii_zones", {}) is None
+
+
+@pytest.mark.skipif(not HAS_DEPS, reason="numpy/PIL not available")
+class TestEffectExecutorPrimitives:
+    """Test EffectExecutor with Python primitives."""
+
+    def test_executor_loads_primitive(self):
+        """Test that executor finds primitive effects."""
+        from ..nodes.effect import _get_python_primitive_effect
+
+        effect_fn = _get_python_primitive_effect("ascii_art")
+        assert effect_fn is not None
+
+        effect_fn = _get_python_primitive_effect("ascii_zones")
+        assert effect_fn is not None
+
+    def test_executor_rejects_unknown_effect(self):
+        """Test that executor returns None for unknown effects."""
+        from ..nodes.effect import _get_python_primitive_effect
+
+        effect_fn = _get_python_primitive_effect("nonexistent_effect")
+        assert effect_fn is None
+
+    def test_execute_ascii_art_effect(self, tmp_path):
+        """Test executing ascii_art effect on a video."""
+        from ..nodes.effect import EffectExecutor
+
+        # Create test video
+        input_path = tmp_path / "input.mp4"
+        if not create_test_video(input_path):
+            pytest.skip("Could not create test video")
+
+        output_path = tmp_path / "output.mkv"
+        executor = EffectExecutor()
+
+        result = executor.execute(
+            config={"effect": "ascii_art", "char_size": 8},
+            inputs=[input_path],
+            output_path=output_path,
+        )
+
+        assert result.exists()
+        assert result.stat().st_size > 0
+
+
+def run_all_tests():
+    """Run tests manually."""
+    import sys
+
+    # Check dependencies
+    if not HAS_DEPS:
+        print("SKIP: numpy/PIL not available")
+        return
+
+    print("Testing primitives...")
+
+    # Test primitive functions
+    frame = np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8)
+
+    print("  ascii_art_frame...", end=" ")
+    result = ascii_art_frame(frame, char_size=8)
+    assert result.shape == frame.shape
+    print("PASS")
+
+    print("  ascii_zones_frame...", end=" ")
+    result = ascii_zones_frame(frame, char_size=8)
+    assert result.shape == frame.shape
+    print("PASS")
+
+    # Test FFmpegCompiler mappings
+    print("\nTesting FFmpegCompiler mappings...")
+    compiler = FFmpegCompiler()
+
+    print("  ascii_art python_primitive...", end=" ")
+    assert compiler.has_python_primitive("ascii_art") == "ascii_art_frame"
+    print("PASS")
+
+    print("  ascii_zones python_primitive...", end=" ")
+    assert compiler.has_python_primitive("ascii_zones") == "ascii_zones_frame"
+    print("PASS")
+
+    # Test executor lookup
+    print("\nTesting EffectExecutor...")
+    try:
+        from ..nodes.effect import _get_python_primitive_effect
+
+        print("  _get_python_primitive_effect(ascii_art)...", end=" ")
+        effect_fn = _get_python_primitive_effect("ascii_art")
+        assert effect_fn is not None
+        print("PASS")
+
+        print("  _get_python_primitive_effect(ascii_zones)...", end=" ")
+        effect_fn = _get_python_primitive_effect("ascii_zones")
+        assert effect_fn is not None
+        print("PASS")
+
+    except ImportError as e:
+        print(f"SKIP: {e}")
+
+    print("\n=== All tests passed ===")
+
+
+if __name__ == "__main__":
+    run_all_tests()
--- a/core/artdag/sexp/test_stage_cache.py
+++ b/core/artdag/sexp/test_stage_cache.py
@@ -0,0 +1,324 @@
+"""
+Tests for stage cache layer.
+
+Tests S-expression storage for stage results and analysis data.
+"""
+
+import pytest
+import tempfile
+from pathlib import Path
+
+from .stage_cache import (
+    StageCache,
+    StageCacheEntry,
+    StageOutput,
+    AnalysisResult,
+    save_analysis_result,
+    load_analysis_result,
+)
+from .parser import parse, serialize
+
+
+class TestStageOutput:
+    """Test StageOutput dataclass and serialization."""
+
+    def test_stage_output_artifact(self):
+        """StageOutput can represent an artifact."""
+        output = StageOutput(
+            cache_id="abc123",
+            output_type="artifact",
+        )
+        assert output.cache_id == "abc123"
+        assert output.output_type == "artifact"
+
+    def test_stage_output_scalar(self):
+        """StageOutput can represent a scalar value."""
+        output = StageOutput(
+            value=120.5,
+            output_type="scalar",
+        )
+        assert output.value == 120.5
+        assert output.output_type == "scalar"
+
+    def test_stage_output_to_sexp(self):
+        """StageOutput serializes to sexp."""
+        output = StageOutput(
+            cache_id="abc123",
+            output_type="artifact",
+        )
+        sexp = output.to_sexp()
+        sexp_str = serialize(sexp)
+
+        assert "cache-id" in sexp_str
+        assert "abc123" in sexp_str
+        assert "type" in sexp_str
+        assert "artifact" in sexp_str
+
+    def test_stage_output_from_sexp(self):
+        """StageOutput parses from sexp."""
+        sexp = parse('(:cache-id "def456" :type :analysis)')
+        output = StageOutput.from_sexp(sexp)
+
+        assert output.cache_id == "def456"
+        assert output.output_type == "analysis"
+
+
+class TestStageCacheEntry:
+    """Test StageCacheEntry serialization."""
+
+    def test_stage_cache_entry_to_sexp(self):
+        """StageCacheEntry serializes to sexp."""
+        entry = StageCacheEntry(
+            stage_name="analyze-a",
+            cache_id="stage_abc123",
+            outputs={
+                "beats": StageOutput(cache_id="beats_def456", output_type="analysis"),
+                "tempo": StageOutput(value=120.5, output_type="scalar"),
+            },
+            completed_at=1705678900.123,
+        )
+
+        sexp = entry.to_sexp()
+        sexp_str = serialize(sexp)
+
+        assert "stage-result" in sexp_str
+        assert "analyze-a" in sexp_str
+        assert "stage_abc123" in sexp_str
+        assert "outputs" in sexp_str
+        assert "beats" in sexp_str
+
+    def test_stage_cache_entry_roundtrip(self):
+        """save -> load produces identical data."""
+        entry = StageCacheEntry(
+            stage_name="analyze-b",
+            cache_id="stage_xyz789",
+            outputs={
+                "segments": StageOutput(cache_id="seg_123", output_type="artifact"),
+            },
+            completed_at=1705678900.0,
+        )
+
+        sexp_str = entry.to_string()
+        loaded = StageCacheEntry.from_string(sexp_str)
+
+        assert loaded.stage_name == entry.stage_name
+        assert loaded.cache_id == entry.cache_id
+        assert "segments" in loaded.outputs
+        assert loaded.outputs["segments"].cache_id == "seg_123"
+
+    def test_stage_cache_entry_from_sexp(self):
+        """StageCacheEntry parses from sexp."""
+        sexp_str = '''
+        (stage-result
+          :name "test-stage"
+          :cache-id "cache123"
+          :completed-at 1705678900.0
+          :outputs ((beats :cache-id "beats123" :type :analysis)))
+        '''
+        entry = StageCacheEntry.from_string(sexp_str)
+
+        assert entry.stage_name == "test-stage"
+        assert entry.cache_id == "cache123"
+        assert "beats" in entry.outputs
+        assert entry.outputs["beats"].cache_id == "beats123"
+
+
+class TestStageCache:
+    """Test StageCache file operations."""
+
+    def test_save_and_load_stage(self):
+        """Save and load a stage result."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            cache = StageCache(tmpdir)
+
+            entry = StageCacheEntry(
+                stage_name="analyze",
+                cache_id="test_cache_id",
+                outputs={
+                    "beats": StageOutput(cache_id="beats_out", output_type="analysis"),
+                },
+            )
+
+            path = cache.save_stage(entry)
+            assert path.exists()
+            assert path.suffix == ".sexp"
+
+            loaded = cache.load_stage("test_cache_id")
+            assert loaded is not None
+            assert loaded.stage_name == "analyze"
+            assert "beats" in loaded.outputs
+
+    def test_has_stage(self):
+        """Check if stage is cached."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            cache = StageCache(tmpdir)
+
+            assert not cache.has_stage("nonexistent")
+
+            entry = StageCacheEntry(
+                stage_name="test",
+                cache_id="exists_cache_id",
+                outputs={},
+            )
+            cache.save_stage(entry)
+
+            assert cache.has_stage("exists_cache_id")
+
+    def test_delete_stage(self):
+        """Delete a cached stage."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            cache = StageCache(tmpdir)
+
+            entry = StageCacheEntry(
+                stage_name="test",
+                cache_id="to_delete",
+                outputs={},
+            )
+            cache.save_stage(entry)
+
+            assert cache.has_stage("to_delete")
+            result = cache.delete_stage("to_delete")
+            assert result is True
+            assert not cache.has_stage("to_delete")
+
+    def test_list_stages(self):
+        """List all cached stages."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            cache = StageCache(tmpdir)
+
+            for i in range(3):
+                entry = StageCacheEntry(
+                    stage_name=f"stage{i}",
+                    cache_id=f"cache_{i}",
+                    outputs={},
+                )
+                cache.save_stage(entry)
+
+            stages = cache.list_stages()
+            assert len(stages) == 3
+            assert "cache_0" in stages
+            assert "cache_1" in stages
+            assert "cache_2" in stages
+
+    def test_clear(self):
+        """Clear all cached stages."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            cache = StageCache(tmpdir)
+
+            for i in range(3):
+                entry = StageCacheEntry(
+                    stage_name=f"stage{i}",
+                    cache_id=f"cache_{i}",
+                    outputs={},
+                )
+                cache.save_stage(entry)
+
+            count = cache.clear()
+            assert count == 3
+            assert len(cache.list_stages()) == 0
+
+    def test_cache_file_extension(self):
+        """Cache files use .sexp extension."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            cache = StageCache(tmpdir)
+            path = cache.get_cache_path("test_id")
+            assert path.suffix == ".sexp"
+
+    def test_invalid_sexp_error_handling(self):
+        """Graceful error on corrupted cache file."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            cache = StageCache(tmpdir)
+
+            # Write corrupted content
+            corrupt_path = cache.get_cache_path("corrupted")
+            corrupt_path.write_text("this is not valid sexp )()(")
+
+            # Should return None, not raise
+            result = cache.load_stage("corrupted")
+            assert result is None
+
+
+class TestAnalysisResult:
+    """Test AnalysisResult serialization."""
+
+    def test_analysis_result_to_sexp(self):
+        """AnalysisResult serializes to sexp."""
+        result = AnalysisResult(
+            analyzer="beats",
+            input_hash="input_abc123",
+            data={
+                "duration": 120.5,
+                "tempo": 128.0,
+                "times": [0.0, 0.468, 0.937, 1.406],
+                "values": [0.8, 0.9, 0.7, 0.85],
+            },
+        )
+
+        sexp = result.to_sexp()
+        sexp_str = serialize(sexp)
+
+        assert "analysis-result" in sexp_str
+        assert "beats" in sexp_str
+        assert "duration" in sexp_str
+        assert "tempo" in sexp_str
+        assert "times" in sexp_str
+
+    def test_analysis_result_roundtrip(self):
+        """Analysis result round-trips through sexp."""
+        original = AnalysisResult(
+            analyzer="scenes",
+            input_hash="video_xyz",
+            data={
+                "scene_count": 5,
+                "scene_times": [0.0, 10.5, 25.0, 45.2, 60.0],
+            },
+        )
+
+        sexp_str = original.to_string()
+        loaded = AnalysisResult.from_string(sexp_str)
+
+        assert loaded.analyzer == original.analyzer
+        assert loaded.input_hash == original.input_hash
+        assert loaded.data["scene_count"] == 5
+
+    def test_save_and_load_analysis_result(self):
+        """Save and load analysis result from cache."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            result = AnalysisResult(
+                analyzer="beats",
+                input_hash="audio_123",
+                data={
+                    "tempo": 120.0,
+                    "times": [0.0, 0.5, 1.0],
+                },
+            )
+
+            path = save_analysis_result(tmpdir, "node_abc", result)
+            assert path.exists()
+            assert path.name == "analysis.sexp"
+
+            loaded = load_analysis_result(tmpdir, "node_abc")
+            assert loaded is not None
+            assert loaded.analyzer == "beats"
+            assert loaded.data["tempo"] == 120.0
+
+    def test_analysis_result_kebab_case(self):
+        """Keys convert between snake_case and kebab-case."""
+        result = AnalysisResult(
+            analyzer="test",
+            input_hash="hash",
+            data={
+                "scene_count": 5,
+                "beat_times": [1, 2, 3],
+            },
+        )
+
+        sexp_str = result.to_string()
+        # Kebab case in sexp
+        assert "scene-count" in sexp_str
+        assert "beat-times" in sexp_str
+
+        # Back to snake_case after parsing
+        loaded = AnalysisResult.from_string(sexp_str)
+        assert "scene_count" in loaded.data
+        assert "beat_times" in loaded.data
--- a/core/artdag/sexp/test_stage_compiler.py
+++ b/core/artdag/sexp/test_stage_compiler.py
@@ -0,0 +1,286 @@
+"""
+Tests for stage compilation and scoping.
+
+Tests the CompiledStage dataclass, stage form parsing,
+variable scoping, and dependency validation.
+"""
+
+import pytest
+
+from .parser import parse, Symbol, Keyword
+from .compiler import (
+    compile_recipe,
+    CompileError,
+    CompiledStage,
+    CompilerContext,
+    _topological_sort_stages,
+)
+
+
+class TestStageCompilation:
+    """Test stage form compilation."""
+
+    def test_parse_stage_form_basic(self):
+        """Stage parses correctly with name and outputs."""
+        recipe = '''
+        (recipe "test-stage"
+          (def audio (source :path "test.mp3"))
+
+          (stage :analyze
+            :outputs [beats]
+            (def beats (-> audio (analyze beats)))
+            (-> audio (segment :times beats) (sequence))))
+        '''
+        compiled = compile_recipe(parse(recipe))
+
+        assert len(compiled.stages) == 1
+        assert compiled.stages[0].name == "analyze"
+        assert "beats" in compiled.stages[0].outputs
+        assert len(compiled.stages[0].node_ids) > 0
+
+    def test_parse_stage_with_requires(self):
+        """Stage parses correctly with requires and inputs."""
+        recipe = '''
+        (recipe "test-requires"
+          (def audio (source :path "test.mp3"))
+
+          (stage :analyze
+            :outputs [beats]
+            (def beats (-> audio (analyze beats))))
+
+          (stage :process
+            :requires [:analyze]
+            :inputs [beats]
+            :outputs [segments]
+            (def segments (-> audio (segment :times beats)))
+            (-> segments (sequence))))
+        '''
+        compiled = compile_recipe(parse(recipe))
+
+        assert len(compiled.stages) == 2
+        process_stage = next(s for s in compiled.stages if s.name == "process")
+        assert process_stage.requires == ["analyze"]
+        assert "beats" in process_stage.inputs
+        assert "segments" in process_stage.outputs
+
+    def test_stage_outputs_recorded(self):
+        """Stage outputs are tracked in CompiledStage."""
+        recipe = '''
+        (recipe "test-outputs"
+          (def audio (source :path "test.mp3"))
+
+          (stage :analyze
+            :outputs [beats tempo]
+            (def beats (-> audio (analyze beats)))
+            (def tempo (-> audio (analyze tempo)))
+            (-> audio (segment :times beats) (sequence))))
+        '''
+        compiled = compile_recipe(parse(recipe))
+
+        stage = compiled.stages[0]
+        assert "beats" in stage.outputs
+        assert "tempo" in stage.outputs
+        assert "beats" in stage.output_bindings
+        assert "tempo" in stage.output_bindings
+
+    def test_stage_order_topological(self):
+        """Stages are topologically sorted."""
+        recipe = '''
+        (recipe "test-order"
+          (def audio (source :path "test.mp3"))
+
+          (stage :analyze
+            :outputs [beats]
+            (def beats (-> audio (analyze beats))))
+
+          (stage :output
+            :requires [:analyze]
+            :inputs [beats]
+            (-> audio (segment :times beats) (sequence))))
+        '''
+        compiled = compile_recipe(parse(recipe))
+
+        # analyze should come before output
+        assert compiled.stage_order.index("analyze") < compiled.stage_order.index("output")
+
+
+class TestStageValidation:
+    """Test stage dependency and input validation."""
+
+    def test_stage_requires_validation(self):
+        """Error if requiring non-existent stage."""
+        recipe = '''
+        (recipe "test-bad-require"
+          (def audio (source :path "test.mp3"))
+
+          (stage :process
+            :requires [:nonexistent]
+            :inputs [beats]
+            (def result audio)))
+        '''
+        with pytest.raises(CompileError, match="requires undefined stage"):
+            compile_recipe(parse(recipe))
+
+    def test_stage_inputs_validation(self):
+        """Error if input not produced by required stage."""
+        recipe = '''
+        (recipe "test-bad-input"
+          (def audio (source :path "test.mp3"))
+
+          (stage :analyze
+            :outputs [beats]
+            (def beats (-> audio (analyze beats))))
+
+          (stage :process
+            :requires [:analyze]
+            :inputs [nonexistent]
+            (def result audio)))
+        '''
+        with pytest.raises(CompileError, match="not an output of any required stage"):
+            compile_recipe(parse(recipe))
+
+    def test_undeclared_output_error(self):
+        """Error if stage declares output not defined in body."""
+        recipe = '''
+        (recipe "test-missing-output"
+          (def audio (source :path "test.mp3"))
+
+          (stage :analyze
+            :outputs [beats nonexistent]
+            (def beats (-> audio (analyze beats)))))
+        '''
+        with pytest.raises(CompileError, match="not defined in the stage body"):
+            compile_recipe(parse(recipe))
+
+    def test_forward_reference_detection(self):
+        """Error when requiring a stage not yet defined."""
+        # Forward references are not allowed - stages must be defined
+        # before they can be required
+        recipe = '''
+        (recipe "test-forward"
+          (def audio (source :path "test.mp3"))
+
+          (stage :a
+            :requires [:b]
+            :outputs [out-a]
+            (def out-a audio))
+
+          (stage :b
+            :outputs [out-b]
+            (def out-b audio)
+            audio))
+        '''
+        with pytest.raises(CompileError, match="requires undefined stage"):
+            compile_recipe(parse(recipe))
+
+
+class TestStageScoping:
+    """Test variable scoping between stages."""
+
+    def test_pre_stage_bindings_accessible(self):
+        """Sources defined before stages accessible to all stages."""
+        recipe = '''
+        (recipe "test-pre-stage"
+          (def audio (source :path "test.mp3"))
+          (def video (source :path "test.mp4"))
+
+          (stage :analyze-audio
+            :outputs [beats]
+            (def beats (-> audio (analyze beats))))
+
+          (stage :analyze-video
+            :outputs [scenes]
+            (def scenes (-> video (analyze scenes)))
+            (-> video (segment :times scenes) (sequence))))
+        '''
+        # Should compile without error - audio and video accessible to both stages
+        compiled = compile_recipe(parse(recipe))
+        assert len(compiled.stages) == 2
+
+    def test_stage_bindings_flow_through_requires(self):
+        """Stage bindings accessible to dependent stages via :inputs."""
+        recipe = '''
+        (recipe "test-binding-flow"
+          (def audio (source :path "test.mp3"))
+
+          (stage :analyze
+            :outputs [beats]
+            (def beats (-> audio (analyze beats))))
+
+          (stage :process
+            :requires [:analyze]
+            :inputs [beats]
+            :outputs [result]
+            (def result (-> audio (segment :times beats)))
+            (-> result (sequence))))
+        '''
+        # Should compile without error - beats flows from analyze to process
+        compiled = compile_recipe(parse(recipe))
+        assert len(compiled.stages) == 2
+
+
+class TestTopologicalSort:
+    """Test stage topological sorting."""
+
+    def test_empty_stages(self):
+        """Empty stages returns empty list."""
+        assert _topological_sort_stages({}) == []
+
+    def test_single_stage(self):
+        """Single stage returns single element."""
+        stages = {
+            "a": CompiledStage(
+                name="a",
+                requires=[],
+                inputs=[],
+                outputs=["out"],
+                node_ids=["n1"],
+                output_bindings={"out": "n1"},
+            )
+        }
+        assert _topological_sort_stages(stages) == ["a"]
+
+    def test_linear_chain(self):
+        """Linear chain sorted correctly."""
+        stages = {
+            "a": CompiledStage(name="a", requires=[], inputs=[], outputs=["x"],
+                              node_ids=["n1"], output_bindings={"x": "n1"}),
+            "b": CompiledStage(name="b", requires=["a"], inputs=["x"], outputs=["y"],
+                              node_ids=["n2"], output_bindings={"y": "n2"}),
+            "c": CompiledStage(name="c", requires=["b"], inputs=["y"], outputs=["z"],
+                              node_ids=["n3"], output_bindings={"z": "n3"}),
+        }
+        result = _topological_sort_stages(stages)
+        assert result.index("a") < result.index("b") < result.index("c")
+
+    def test_parallel_stages_same_level(self):
+        """Parallel stages are both valid orderings."""
+        stages = {
+            "a": CompiledStage(name="a", requires=[], inputs=[], outputs=["x"],
+                              node_ids=["n1"], output_bindings={"x": "n1"}),
+            "b": CompiledStage(name="b", requires=[], inputs=[], outputs=["y"],
+                              node_ids=["n2"], output_bindings={"y": "n2"}),
+        }
+        result = _topological_sort_stages(stages)
+        # Both a and b should be in result (order doesn't matter)
+        assert set(result) == {"a", "b"}
+
+    def test_diamond_dependency(self):
+        """Diamond pattern: A -> B, A -> C, B+C -> D."""
+        stages = {
+            "a": CompiledStage(name="a", requires=[], inputs=[], outputs=["x"],
+                              node_ids=["n1"], output_bindings={"x": "n1"}),
+            "b": CompiledStage(name="b", requires=["a"], inputs=["x"], outputs=["y"],
+                              node_ids=["n2"], output_bindings={"y": "n2"}),
+            "c": CompiledStage(name="c", requires=["a"], inputs=["x"], outputs=["z"],
+                              node_ids=["n3"], output_bindings={"z": "n3"}),
+            "d": CompiledStage(name="d", requires=["b", "c"], inputs=["y", "z"], outputs=["out"],
+                              node_ids=["n4"], output_bindings={"out": "n4"}),
+        }
+        result = _topological_sort_stages(stages)
+        # a must be first, d must be last
+        assert result[0] == "a"
+        assert result[-1] == "d"
+        # b and c must be before d
+        assert result.index("b") < result.index("d")
+        assert result.index("c") < result.index("d")
--- a/core/artdag/sexp/test_stage_integration.py
+++ b/core/artdag/sexp/test_stage_integration.py
@@ -0,0 +1,739 @@
+"""
+End-to-end integration tests for staged recipes.
+
+Tests the complete flow: compile -> plan -> execute
+for recipes with stages.
+"""
+
+import pytest
+import tempfile
+from pathlib import Path
+
+from .parser import parse, serialize
+from .compiler import compile_recipe, CompileError
+from .planner import ExecutionPlanSexp, StagePlan
+from .stage_cache import StageCache, StageCacheEntry, StageOutput
+from .scheduler import StagePlanScheduler, StagePlanResult
+
+
+class TestSimpleTwoStageRecipe:
+    """Test basic two-stage recipe flow."""
+
+    def test_compile_two_stage_recipe(self):
+        """Compile a simple two-stage recipe."""
+        recipe = '''
+        (recipe "test-two-stages"
+          (def audio (source :path "test.mp3"))
+
+          (stage :analyze
+            :outputs [beats]
+            (def beats (-> audio (analyze beats))))
+
+          (stage :output
+            :requires [:analyze]
+            :inputs [beats]
+            (-> audio (segment :times beats) (sequence))))
+        '''
+        compiled = compile_recipe(parse(recipe))
+
+        assert len(compiled.stages) == 2
+        assert compiled.stage_order == ["analyze", "output"]
+
+        analyze_stage = compiled.stages[0]
+        assert analyze_stage.name == "analyze"
+        assert "beats" in analyze_stage.outputs
+
+        output_stage = compiled.stages[1]
+        assert output_stage.name == "output"
+        assert output_stage.requires == ["analyze"]
+        assert "beats" in output_stage.inputs
+
+
+class TestParallelAnalysisStages:
+    """Test parallel analysis stages."""
+
+    def test_compile_parallel_stages(self):
+        """Two analysis stages can run in parallel."""
+        recipe = '''
+        (recipe "test-parallel"
+          (def audio-a (source :path "a.mp3"))
+          (def audio-b (source :path "b.mp3"))
+
+          (stage :analyze-a
+            :outputs [beats-a]
+            (def beats-a (-> audio-a (analyze beats))))
+
+          (stage :analyze-b
+            :outputs [beats-b]
+            (def beats-b (-> audio-b (analyze beats))))
+
+          (stage :combine
+            :requires [:analyze-a :analyze-b]
+            :inputs [beats-a beats-b]
+            (-> audio-a (segment :times beats-a) (sequence))))
+        '''
+        compiled = compile_recipe(parse(recipe))
+
+        assert len(compiled.stages) == 3
+
+        # analyze-a and analyze-b should both be at level 0 (parallel)
+        analyze_a = next(s for s in compiled.stages if s.name == "analyze-a")
+        analyze_b = next(s for s in compiled.stages if s.name == "analyze-b")
+        combine = next(s for s in compiled.stages if s.name == "combine")
+
+        assert analyze_a.requires == []
+        assert analyze_b.requires == []
+        assert set(combine.requires) == {"analyze-a", "analyze-b"}
+
+
+class TestDiamondDependency:
+    """Test diamond dependency pattern: A -> B, A -> C, B+C -> D."""
+
+    def test_compile_diamond_pattern(self):
+        """Diamond pattern compiles correctly."""
+        recipe = '''
+        (recipe "test-diamond"
+          (def audio (source :path "test.mp3"))
+
+          (stage :source-stage
+            :outputs [audio-ref]
+            (def audio-ref audio))
+
+          (stage :branch-b
+            :requires [:source-stage]
+            :inputs [audio-ref]
+            :outputs [result-b]
+            (def result-b (-> audio-ref (effect gain :amount 0.5))))
+
+          (stage :branch-c
+            :requires [:source-stage]
+            :inputs [audio-ref]
+            :outputs [result-c]
+            (def result-c (-> audio-ref (effect gain :amount 0.8))))
+
+          (stage :merge
+            :requires [:branch-b :branch-c]
+            :inputs [result-b result-c]
+            (-> result-b (blend result-c :mode "mix"))))
+        '''
+        compiled = compile_recipe(parse(recipe))
+
+        assert len(compiled.stages) == 4
+
+        # Check dependencies
+        source = next(s for s in compiled.stages if s.name == "source-stage")
+        branch_b = next(s for s in compiled.stages if s.name == "branch-b")
+        branch_c = next(s for s in compiled.stages if s.name == "branch-c")
+        merge = next(s for s in compiled.stages if s.name == "merge")
+
+        assert source.requires == []
+        assert branch_b.requires == ["source-stage"]
+        assert branch_c.requires == ["source-stage"]
+        assert set(merge.requires) == {"branch-b", "branch-c"}
+
+        # source-stage should come first in order
+        assert compiled.stage_order.index("source-stage") < compiled.stage_order.index("branch-b")
+        assert compiled.stage_order.index("source-stage") < compiled.stage_order.index("branch-c")
+        # merge should come last
+        assert compiled.stage_order.index("branch-b") < compiled.stage_order.index("merge")
+        assert compiled.stage_order.index("branch-c") < compiled.stage_order.index("merge")
+
+
+class TestStageReuseOnRerun:
+    """Test that re-running recipe uses cached stages."""
+
+    def test_stage_reuse(self):
+        """Re-running recipe uses cached stages."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            stage_cache = StageCache(tmpdir)
+
+            # Simulate first run by caching a stage
+            entry = StageCacheEntry(
+                stage_name="analyze",
+                cache_id="fixed_cache_id",
+                outputs={"beats": StageOutput(cache_id="beats_out", output_type="analysis")},
+            )
+            stage_cache.save_stage(entry)
+
+            # Verify cache exists
+            assert stage_cache.has_stage("fixed_cache_id")
+
+            # Second run should find cache
+            loaded = stage_cache.load_stage("fixed_cache_id")
+            assert loaded is not None
+            assert loaded.stage_name == "analyze"
+
+
+class TestExplicitDataFlowEndToEnd:
+    """Test that analysis results flow through :inputs/:outputs."""
+
+    def test_data_flow_declaration(self):
+        """Explicit data flow is declared correctly."""
+        recipe = '''
+        (recipe "test-data-flow"
+          (def audio (source :path "test.mp3"))
+
+          (stage :analyze
+            :outputs [beats tempo]
+            (def beats (-> audio (analyze beats)))
+            (def tempo (-> audio (analyze tempo))))
+
+          (stage :process
+            :requires [:analyze]
+            :inputs [beats tempo]
+            :outputs [result]
+            (def result (-> audio (segment :times beats) (effect speed :factor tempo)))
+            (-> result (sequence))))
+        '''
+        compiled = compile_recipe(parse(recipe))
+
+        analyze = next(s for s in compiled.stages if s.name == "analyze")
+        process = next(s for s in compiled.stages if s.name == "process")
+
+        # Analyze outputs
+        assert set(analyze.outputs) == {"beats", "tempo"}
+        assert "beats" in analyze.output_bindings
+        assert "tempo" in analyze.output_bindings
+
+        # Process inputs
+        assert set(process.inputs) == {"beats", "tempo"}
+        assert process.requires == ["analyze"]
+
+
+class TestRecipeFixtures:
+    """Test using recipe fixtures."""
+
+    @pytest.fixture
+    def test_recipe_two_stages(self):
+        return '''
+        (recipe "test-two-stages"
+          (def audio (source :path "test.mp3"))
+
+          (stage :analyze
+            :outputs [beats]
+            (def beats (-> audio (analyze beats))))
+
+          (stage :output
+            :requires [:analyze]
+            :inputs [beats]
+            (-> audio (segment :times beats) (sequence))))
+        '''
+
+    @pytest.fixture
+    def test_recipe_parallel_stages(self):
+        return '''
+        (recipe "test-parallel"
+          (def audio-a (source :path "a.mp3"))
+          (def audio-b (source :path "b.mp3"))
+
+          (stage :analyze-a
+            :outputs [beats-a]
+            (def beats-a (-> audio-a (analyze beats))))
+
+          (stage :analyze-b
+            :outputs [beats-b]
+            (def beats-b (-> audio-b (analyze beats))))
+
+          (stage :combine
+            :requires [:analyze-a :analyze-b]
+            :inputs [beats-a beats-b]
+            (-> audio-a (blend audio-b :mode "mix"))))
+        '''
+
+    def test_two_stages_fixture(self, test_recipe_two_stages):
+        """Two-stage recipe fixture compiles."""
+        compiled = compile_recipe(parse(test_recipe_two_stages))
+        assert len(compiled.stages) == 2
+
+    def test_parallel_stages_fixture(self, test_recipe_parallel_stages):
+        """Parallel stages recipe fixture compiles."""
+        compiled = compile_recipe(parse(test_recipe_parallel_stages))
+        assert len(compiled.stages) == 3
+
+
+class TestStageValidationErrors:
+    """Test error handling for invalid stage recipes."""
+
+    def test_missing_output_declaration(self):
+        """Error when stage output not declared."""
+        recipe = '''
+        (recipe "test-missing-output"
+          (def audio (source :path "test.mp3"))
+
+          (stage :analyze
+            :outputs [beats nonexistent]
+            (def beats (-> audio (analyze beats)))))
+        '''
+        with pytest.raises(CompileError, match="not defined in the stage body"):
+            compile_recipe(parse(recipe))
+
+    def test_input_without_requires(self):
+        """Error when using input not from required stage."""
+        recipe = '''
+        (recipe "test-bad-input"
+          (def audio (source :path "test.mp3"))
+
+          (stage :analyze
+            :outputs [beats]
+            (def beats (-> audio (analyze beats))))
+
+          (stage :process
+            :requires []
+            :inputs [beats]
+            (def result audio)))
+        '''
+        with pytest.raises(CompileError, match="not an output of any required stage"):
+            compile_recipe(parse(recipe))
+
+    def test_forward_reference(self):
+        """Error when requiring stage not yet defined (forward reference)."""
+        recipe = '''
+        (recipe "test-forward-ref"
+          (def audio (source :path "test.mp3"))
+
+          (stage :a
+            :requires [:b]
+            :outputs [out-a]
+            (def out-a audio)
+            audio)
+
+          (stage :b
+            :outputs [out-b]
+            (def out-b audio)
+            audio))
+        '''
+        with pytest.raises(CompileError, match="requires undefined stage"):
+            compile_recipe(parse(recipe))
+
+
+class TestBeatSyncDemoRecipe:
+    """Test the beat-sync demo recipe from examples."""
+
+    BEAT_SYNC_RECIPE = '''
+    ;; Simple staged recipe demo
+    (recipe "beat-sync-demo"
+      :version "1.0"
+      :description "Demo of staged beat-sync workflow"
+
+      ;; Pre-stage definitions (available to all stages)
+      (def audio (source :path "input.mp3"))
+
+      ;; Stage 1: Analysis (expensive, cached)
+      (stage :analyze
+        :outputs [beats tempo]
+        (def beats (-> audio (analyze beats)))
+        (def tempo (-> audio (analyze tempo))))
+
+      ;; Stage 2: Processing (uses analysis results)
+      (stage :process
+        :requires [:analyze]
+        :inputs [beats]
+        :outputs [segments]
+        (def segments (-> audio (segment :times beats)))
+        (-> segments (sequence))))
+    '''
+
+    def test_compile_beat_sync_recipe(self):
+        """Beat-sync demo recipe compiles correctly."""
+        compiled = compile_recipe(parse(self.BEAT_SYNC_RECIPE))
+
+        assert compiled.name == "beat-sync-demo"
+        assert compiled.version == "1.0"
+        assert compiled.description == "Demo of staged beat-sync workflow"
+
+    def test_beat_sync_stage_count(self):
+        """Beat-sync has 2 stages in correct order."""
+        compiled = compile_recipe(parse(self.BEAT_SYNC_RECIPE))
+
+        assert len(compiled.stages) == 2
+        assert compiled.stage_order == ["analyze", "process"]
+
+    def test_beat_sync_analyze_stage(self):
+        """Analyze stage has correct outputs."""
+        compiled = compile_recipe(parse(self.BEAT_SYNC_RECIPE))
+
+        analyze = next(s for s in compiled.stages if s.name == "analyze")
+        assert analyze.requires == []
+        assert analyze.inputs == []
+        assert set(analyze.outputs) == {"beats", "tempo"}
+        assert "beats" in analyze.output_bindings
+        assert "tempo" in analyze.output_bindings
+
+    def test_beat_sync_process_stage(self):
+        """Process stage has correct dependencies and inputs."""
+        compiled = compile_recipe(parse(self.BEAT_SYNC_RECIPE))
+
+        process = next(s for s in compiled.stages if s.name == "process")
+        assert process.requires == ["analyze"]
+        assert "beats" in process.inputs
+        assert "segments" in process.outputs
+
+    def test_beat_sync_node_count(self):
+        """Beat-sync generates expected number of nodes."""
+        compiled = compile_recipe(parse(self.BEAT_SYNC_RECIPE))
+
+        # 1 SOURCE + 2 ANALYZE + 1 SEGMENT + 1 SEQUENCE = 5 nodes
+        assert len(compiled.nodes) == 5
+
+    def test_beat_sync_node_types(self):
+        """Beat-sync generates correct node types."""
+        compiled = compile_recipe(parse(self.BEAT_SYNC_RECIPE))
+
+        node_types = [n["type"] for n in compiled.nodes]
+        assert node_types.count("SOURCE") == 1
+        assert node_types.count("ANALYZE") == 2
+        assert node_types.count("SEGMENT") == 1
+        assert node_types.count("SEQUENCE") == 1
+
+    def test_beat_sync_output_is_sequence(self):
+        """Beat-sync output node is the sequence node."""
+        compiled = compile_recipe(parse(self.BEAT_SYNC_RECIPE))
+
+        output_node = next(n for n in compiled.nodes if n["id"] == compiled.output_node_id)
+        assert output_node["type"] == "SEQUENCE"
+
+
+class TestAsciiArtStagedRecipe:
+    """Test the ASCII art staged recipe."""
+
+    ASCII_ART_STAGED_RECIPE = '''
+    ;; ASCII art effect with staged execution
+    (recipe "ascii_art_staged"
+      :version "1.0"
+      :description "ASCII art effect with staged execution"
+      :encoding (:codec "libx264" :crf 20 :preset "medium" :audio-codec "aac" :fps 30)
+
+      ;; Registry
+      (effect ascii_art :path "sexp_effects/effects/ascii_art.sexp")
+      (analyzer energy :path "../artdag-analyzers/energy/analyzer.py")
+
+      ;; Pre-stage definitions
+      (def color_mode "color")
+      (def video (source :path "monday.webm"))
+      (def audio (source :path "dizzy.mp3"))
+
+      ;; Stage 1: Analysis
+      (stage :analyze
+        :outputs [energy-data]
+        (def audio-clip (-> audio (segment :start 60 :duration 10)))
+        (def energy-data (-> audio-clip (analyze energy))))
+
+      ;; Stage 2: Process
+      (stage :process
+        :requires [:analyze]
+        :inputs [energy-data]
+        :outputs [result audio-clip]
+        (def clip (-> video (segment :start 0 :duration 10)))
+        (def audio-clip (-> audio (segment :start 60 :duration 10)))
+        (def result (-> clip
+          (effect ascii_art
+            :char_size (bind energy-data values :range [2 32])
+            :color_mode color_mode))))
+
+      ;; Stage 3: Output
+      (stage :output
+        :requires [:process]
+        :inputs [result audio-clip]
+        (mux result audio-clip)))
+    '''
+
+    def test_compile_ascii_art_staged(self):
+        """ASCII art staged recipe compiles correctly."""
+        compiled = compile_recipe(parse(self.ASCII_ART_STAGED_RECIPE))
+
+        assert compiled.name == "ascii_art_staged"
+        assert compiled.version == "1.0"
+
+    def test_ascii_art_stage_count(self):
+        """ASCII art has 3 stages in correct order."""
+        compiled = compile_recipe(parse(self.ASCII_ART_STAGED_RECIPE))
+
+        assert len(compiled.stages) == 3
+        assert compiled.stage_order == ["analyze", "process", "output"]
+
+    def test_ascii_art_analyze_stage(self):
+        """Analyze stage outputs energy-data."""
+        compiled = compile_recipe(parse(self.ASCII_ART_STAGED_RECIPE))
+
+        analyze = next(s for s in compiled.stages if s.name == "analyze")
+        assert analyze.requires == []
+        assert analyze.inputs == []
+        assert "energy-data" in analyze.outputs
+
+    def test_ascii_art_process_stage(self):
+        """Process stage requires analyze and outputs result."""
+        compiled = compile_recipe(parse(self.ASCII_ART_STAGED_RECIPE))
+
+        process = next(s for s in compiled.stages if s.name == "process")
+        assert process.requires == ["analyze"]
+        assert "energy-data" in process.inputs
+        assert "result" in process.outputs
+        assert "audio-clip" in process.outputs
+
+    def test_ascii_art_output_stage(self):
+        """Output stage requires process and has mux."""
+        compiled = compile_recipe(parse(self.ASCII_ART_STAGED_RECIPE))
+
+        output = next(s for s in compiled.stages if s.name == "output")
+        assert output.requires == ["process"]
+        assert "result" in output.inputs
+        assert "audio-clip" in output.inputs
+
+    def test_ascii_art_node_count(self):
+        """ASCII art generates expected nodes."""
+        compiled = compile_recipe(parse(self.ASCII_ART_STAGED_RECIPE))
+
+        # 2 SOURCE + 2 SEGMENT + 1 ANALYZE + 1 EFFECT + 1 MUX = 7+ nodes
+        assert len(compiled.nodes) >= 7
+
+    def test_ascii_art_has_mux_output(self):
+        """ASCII art output is MUX node."""
+        compiled = compile_recipe(parse(self.ASCII_ART_STAGED_RECIPE))
+
+        output_node = next(n for n in compiled.nodes if n["id"] == compiled.output_node_id)
+        assert output_node["type"] == "MUX"
+
+
+class TestMixedStagedAndNonStagedRecipes:
+    """Test that non-staged recipes still work."""
+
+    def test_recipe_without_stages(self):
+        """Non-staged recipe compiles normally."""
+        recipe = '''
+        (recipe "no-stages"
+          (-> (source :path "test.mp3")
+              (effect gain :amount 0.5)))
+        '''
+        compiled = compile_recipe(parse(recipe))
+
+        assert compiled.stages == []
+        assert compiled.stage_order == []
+        # Should still have nodes
+        assert len(compiled.nodes) > 0
+
+    def test_mixed_pre_stage_and_stages(self):
+        """Pre-stage definitions work with stages."""
+        recipe = '''
+        (recipe "mixed"
+          ;; Pre-stage definitions
+          (def audio (source :path "test.mp3"))
+          (def volume 0.8)
+
+          ;; Stage using pre-stage definitions, ending with output expression
+          (stage :process
+            :outputs [result]
+            (def result (-> audio (effect gain :amount volume)))
+            result))
+        '''
+        compiled = compile_recipe(parse(recipe))
+
+        assert len(compiled.stages) == 1
+        # audio and volume should be accessible in stage
+        process = compiled.stages[0]
+        assert process.name == "process"
+        assert "result" in process.outputs
+
+
+class TestEffectParamsBlock:
+    """Test :params block parsing in effect definitions."""
+
+    def test_parse_effect_with_params_block(self):
+        """Parse effect with new :params syntax."""
+        from .effect_loader import load_sexp_effect
+
+        effect_code = '''
+        (define-effect test_effect
+          :params (
+            (size :type int :default 10 :range [1 100] :desc "Size parameter")
+            (color :type string :default "red" :desc "Color parameter")
+            (enabled :type int :default 1 :range [0 1] :desc "Enable flag")
+          )
+          frame)
+        '''
+        name, process_fn, defaults, param_defs = load_sexp_effect(effect_code)
+
+        assert name == "test_effect"
+        assert len(param_defs) == 3
+        assert defaults["size"] == 10
+        assert defaults["color"] == "red"
+        assert defaults["enabled"] == 1
+
+        # Check ParamDef objects
+        size_param = param_defs[0]
+        assert size_param.name == "size"
+        assert size_param.param_type == "int"
+        assert size_param.default == 10
+        assert size_param.range_min == 1.0
+        assert size_param.range_max == 100.0
+        assert size_param.description == "Size parameter"
+
+        color_param = param_defs[1]
+        assert color_param.name == "color"
+        assert color_param.param_type == "string"
+        assert color_param.default == "red"
+
+    def test_parse_effect_with_choices(self):
+        """Parse effect with choices in :params."""
+        from .effect_loader import load_sexp_effect
+
+        effect_code = '''
+        (define-effect mode_effect
+          :params (
+            (mode :type string :default "fast"
+              :choices [fast slow medium]
+              :desc "Processing mode")
+          )
+          frame)
+        '''
+        name, _, defaults, param_defs = load_sexp_effect(effect_code)
+
+        assert name == "mode_effect"
+        assert defaults["mode"] == "fast"
+
+        mode_param = param_defs[0]
+        assert mode_param.choices == ["fast", "slow", "medium"]
+
+    def test_legacy_effect_syntax_rejected(self):
+        """Legacy effect syntax should be rejected."""
+        from .effect_loader import load_sexp_effect
+        import pytest
+
+        effect_code = '''
+        (define-effect legacy_effect
+          ((width 100)
+           (height 200)
+           (name "default"))
+          frame)
+        '''
+        with pytest.raises(ValueError) as exc_info:
+            load_sexp_effect(effect_code)
+
+        assert "Legacy parameter syntax" in str(exc_info.value)
+        assert ":params" in str(exc_info.value)
+
+    def test_effect_params_introspection(self):
+        """Test that effect params are available for introspection."""
+        from .effect_loader import load_sexp_effect_file
+        from pathlib import Path
+
+        # Create a temp effect file
+        import tempfile
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.sexp', delete=False) as f:
+            f.write('''
+            (define-effect introspect_test
+              :params (
+                (alpha :type float :default 0.5 :range [0 1] :desc "Alpha value")
+              )
+              frame)
+            ''')
+            temp_path = Path(f.name)
+
+        try:
+            name, _, defaults, param_defs = load_sexp_effect_file(temp_path)
+            assert name == "introspect_test"
+            assert len(param_defs) == 1
+            assert param_defs[0].name == "alpha"
+            assert param_defs[0].param_type == "float"
+        finally:
+            temp_path.unlink()
+
+
+class TestConstructParamsBlock:
+    """Test :params block parsing in construct definitions."""
+
+    def test_parse_construct_params_helper(self):
+        """Test the _parse_construct_params helper function."""
+        from .planner import _parse_construct_params
+        from .parser import Symbol, Keyword
+
+        params_list = [
+            [Symbol("duration"), Keyword("type"), Symbol("float"),
+             Keyword("default"), 5.0, Keyword("desc"), "Duration in seconds"],
+            [Symbol("count"), Keyword("type"), Symbol("int"),
+             Keyword("default"), 10],
+        ]
+
+        param_names, param_defaults = _parse_construct_params(params_list)
+
+        assert param_names == ["duration", "count"]
+        assert param_defaults["duration"] == 5.0
+        assert param_defaults["count"] == 10
+
+    def test_construct_params_with_no_defaults(self):
+        """Test construct params where some have no default."""
+        from .planner import _parse_construct_params
+        from .parser import Symbol, Keyword
+
+        params_list = [
+            [Symbol("required_param"), Keyword("type"), Symbol("string")],
+            [Symbol("optional_param"), Keyword("type"), Symbol("int"),
+             Keyword("default"), 42],
+        ]
+
+        param_names, param_defaults = _parse_construct_params(params_list)
+
+        assert param_names == ["required_param", "optional_param"]
+        assert param_defaults["required_param"] is None
+        assert param_defaults["optional_param"] == 42
+
+
+class TestParameterValidation:
+    """Test that unknown parameters are rejected."""
+
+    def test_effect_rejects_unknown_params(self):
+        """Effects should reject unknown parameters."""
+        from .effect_loader import load_sexp_effect
+        import numpy as np
+        import pytest
+
+        effect_code = '''
+        (define-effect test_effect
+          :params (
+            (brightness :type int :default 0 :desc "Brightness")
+          )
+          frame)
+        '''
+        name, process_frame, defaults, _ = load_sexp_effect(effect_code)
+
+        # Create a test frame
+        frame = np.zeros((100, 100, 3), dtype=np.uint8)
+        state = {}
+
+        # Valid param should work
+        result, _ = process_frame(frame, {"brightness": 10}, state)
+        assert isinstance(result, np.ndarray)
+
+        # Unknown param should raise
+        with pytest.raises(ValueError) as exc_info:
+            process_frame(frame, {"unknown_param": 42}, state)
+
+        assert "Unknown parameter 'unknown_param'" in str(exc_info.value)
+        assert "brightness" in str(exc_info.value)
+
+    def test_effect_no_params_rejects_all(self):
+        """Effects with no params should reject any parameter."""
+        from .effect_loader import load_sexp_effect
+        import numpy as np
+        import pytest
+
+        effect_code = '''
+        (define-effect no_params_effect
+          :params ()
+          frame)
+        '''
+        name, process_frame, defaults, _ = load_sexp_effect(effect_code)
+
+        # Create a test frame
+        frame = np.zeros((100, 100, 3), dtype=np.uint8)
+        state = {}
+
+        # Empty params should work
+        result, _ = process_frame(frame, {}, state)
+        assert isinstance(result, np.ndarray)
+
+        # Any param should raise
+        with pytest.raises(ValueError) as exc_info:
+            process_frame(frame, {"any_param": 42}, state)
+
+        assert "Unknown parameter 'any_param'" in str(exc_info.value)
+        assert "(none)" in str(exc_info.value)
--- a/core/artdag/sexp/test_stage_planner.py
+++ b/core/artdag/sexp/test_stage_planner.py
@@ -0,0 +1,228 @@
+"""
+Tests for stage-aware planning.
+
+Tests stage topological sorting, level computation, cache ID computation,
+and plan metadata generation.
+"""
+
+import pytest
+from pathlib import Path
+
+from .parser import parse
+from .compiler import compile_recipe, CompiledStage
+from .planner import (
+    create_plan,
+    StagePlan,
+    _compute_stage_levels,
+    _compute_stage_cache_id,
+)
+
+
+class TestStagePlanning:
+    """Test stage-aware plan creation."""
+
+    def test_stage_topological_sort_in_plan(self):
+        """Stages sorted by dependencies in plan."""
+        recipe = '''
+        (recipe "test-sort"
+          (def audio (source :path "test.mp3"))
+
+          (stage :analyze
+            :outputs [beats]
+            (def beats (-> audio (analyze beats))))
+
+          (stage :output
+            :requires [:analyze]
+            :inputs [beats]
+            (-> audio (segment :times beats) (sequence))))
+        '''
+        compiled = compile_recipe(parse(recipe))
+        # Note: create_plan needs recipe_dir for analysis, we'll test the ordering differently
+        assert compiled.stage_order.index("analyze") < compiled.stage_order.index("output")
+
+    def test_stage_level_computation(self):
+        """Independent stages get same level."""
+        stages = [
+            CompiledStage(name="a", requires=[], inputs=[], outputs=["x"],
+                         node_ids=["n1"], output_bindings={"x": "n1"}),
+            CompiledStage(name="b", requires=[], inputs=[], outputs=["y"],
+                         node_ids=["n2"], output_bindings={"y": "n2"}),
+            CompiledStage(name="c", requires=["a", "b"], inputs=["x", "y"], outputs=["z"],
+                         node_ids=["n3"], output_bindings={"z": "n3"}),
+        ]
+        levels = _compute_stage_levels(stages)
+
+        assert levels["a"] == 0
+        assert levels["b"] == 0
+        assert levels["c"] == 1  # Depends on a and b
+
+    def test_stage_level_chain(self):
+        """Chain stages get increasing levels."""
+        stages = [
+            CompiledStage(name="a", requires=[], inputs=[], outputs=["x"],
+                         node_ids=["n1"], output_bindings={"x": "n1"}),
+            CompiledStage(name="b", requires=["a"], inputs=["x"], outputs=["y"],
+                         node_ids=["n2"], output_bindings={"y": "n2"}),
+            CompiledStage(name="c", requires=["b"], inputs=["y"], outputs=["z"],
+                         node_ids=["n3"], output_bindings={"z": "n3"}),
+        ]
+        levels = _compute_stage_levels(stages)
+
+        assert levels["a"] == 0
+        assert levels["b"] == 1
+        assert levels["c"] == 2
+
+    def test_stage_cache_id_deterministic(self):
+        """Same stage = same cache ID."""
+        stage = CompiledStage(
+            name="analyze",
+            requires=[],
+            inputs=[],
+            outputs=["beats"],
+            node_ids=["abc123"],
+            output_bindings={"beats": "abc123"},
+        )
+
+        cache_id_1 = _compute_stage_cache_id(
+            stage,
+            stage_cache_ids={},
+            node_cache_ids={"abc123": "nodeabc"},
+            cluster_key=None,
+        )
+        cache_id_2 = _compute_stage_cache_id(
+            stage,
+            stage_cache_ids={},
+            node_cache_ids={"abc123": "nodeabc"},
+            cluster_key=None,
+        )
+
+        assert cache_id_1 == cache_id_2
+
+    def test_stage_cache_id_includes_requires(self):
+        """Cache ID changes when required stage cache ID changes."""
+        stage = CompiledStage(
+            name="process",
+            requires=["analyze"],
+            inputs=["beats"],
+            outputs=["result"],
+            node_ids=["def456"],
+            output_bindings={"result": "def456"},
+        )
+
+        cache_id_1 = _compute_stage_cache_id(
+            stage,
+            stage_cache_ids={"analyze": "req_cache_a"},
+            node_cache_ids={"def456": "node_def"},
+            cluster_key=None,
+        )
+        cache_id_2 = _compute_stage_cache_id(
+            stage,
+            stage_cache_ids={"analyze": "req_cache_b"},
+            node_cache_ids={"def456": "node_def"},
+            cluster_key=None,
+        )
+
+        # Different required stage cache IDs should produce different cache IDs
+        assert cache_id_1 != cache_id_2
+
+    def test_stage_cache_id_cluster_key(self):
+        """Cache ID changes with cluster key."""
+        stage = CompiledStage(
+            name="analyze",
+            requires=[],
+            inputs=[],
+            outputs=["beats"],
+            node_ids=["abc123"],
+            output_bindings={"beats": "abc123"},
+        )
+
+        cache_id_no_key = _compute_stage_cache_id(
+            stage,
+            stage_cache_ids={},
+            node_cache_ids={"abc123": "nodeabc"},
+            cluster_key=None,
+        )
+        cache_id_with_key = _compute_stage_cache_id(
+            stage,
+            stage_cache_ids={},
+            node_cache_ids={"abc123": "nodeabc"},
+            cluster_key="cluster123",
+        )
+
+        # Cluster key should change the cache ID
+        assert cache_id_no_key != cache_id_with_key
+
+
+class TestStagePlanMetadata:
+    """Test stage metadata in execution plans."""
+
+    def test_plan_without_stages(self):
+        """Plan without stages has empty stage fields."""
+        recipe = '''
+        (recipe "no-stages"
+          (-> (source :path "test.mp3") (effect gain :amount 0.5)))
+        '''
+        compiled = compile_recipe(parse(recipe))
+        assert compiled.stages == []
+        assert compiled.stage_order == []
+
+
+class TestStagePlanDataclass:
+    """Test StagePlan dataclass."""
+
+    def test_stage_plan_creation(self):
+        """StagePlan can be created with all fields."""
+        from .planner import PlanStep
+
+        step = PlanStep(
+            step_id="step1",
+            node_type="ANALYZE",
+            config={"analyzer": "beats"},
+            inputs=["input1"],
+            cache_id="cache123",
+            level=0,
+            stage="analyze",
+            stage_cache_id="stage_cache_123",
+        )
+
+        stage_plan = StagePlan(
+            stage_name="analyze",
+            cache_id="stage_cache_123",
+            steps=[step],
+            requires=[],
+            output_bindings={"beats": "cache123"},
+            level=0,
+        )
+
+        assert stage_plan.stage_name == "analyze"
+        assert stage_plan.cache_id == "stage_cache_123"
+        assert len(stage_plan.steps) == 1
+        assert stage_plan.level == 0
+
+
+class TestExplicitDataRouting:
+    """Test that plan includes explicit data routing."""
+
+    def test_plan_step_includes_stage_info(self):
+        """PlanStep includes stage and stage_cache_id."""
+        from .planner import PlanStep
+
+        step = PlanStep(
+            step_id="step1",
+            node_type="ANALYZE",
+            config={},
+            inputs=[],
+            cache_id="cache123",
+            level=0,
+            stage="analyze",
+            stage_cache_id="stage_cache_abc",
+        )
+
+        sexp = step.to_sexp()
+        # Convert to string to check for stage info
+        from .parser import serialize
+        sexp_str = serialize(sexp)
+
+        assert "stage" in sexp_str
+        assert "analyze" in sexp_str
+        assert "stage-cache-id" in sexp_str
--- a/core/artdag/sexp/test_stage_scheduler.py
+++ b/core/artdag/sexp/test_stage_scheduler.py
@@ -0,0 +1,323 @@
+"""
+Tests for stage-aware scheduler.
+
+Tests stage cache hit/miss, stage execution ordering,
+and parallel stage support.
+"""
+
+import pytest
+import tempfile
+from unittest.mock import Mock, MagicMock, patch
+
+from .scheduler import (
+    StagePlanScheduler,
+    StageResult,
+    StagePlanResult,
+    create_stage_scheduler,
+    schedule_staged_plan,
+)
+from .planner import ExecutionPlanSexp, PlanStep, StagePlan
+from .stage_cache import StageCache, StageCacheEntry, StageOutput
+
+
+class TestStagePlanScheduler:
+    """Test stage-aware scheduling."""
+
+    def test_plan_without_stages_uses_regular_scheduling(self):
+        """Plans without stages fall back to regular scheduling."""
+        plan = ExecutionPlanSexp(
+            plan_id="test_plan",
+            recipe_id="test_recipe",
+            recipe_hash="abc123",
+            steps=[],
+            output_step_id="output",
+            stage_plans=[],  # No stages
+        )
+
+        scheduler = StagePlanScheduler()
+        # This will use PlanScheduler internally
+        # Without Celery, it just returns completed status
+        result = scheduler.schedule(plan)
+
+        assert isinstance(result, StagePlanResult)
+
+    def test_stage_cache_hit_skips_execution(self):
+        """Cached stage not re-executed."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            stage_cache = StageCache(tmpdir)
+
+            # Pre-populate cache
+            entry = StageCacheEntry(
+                stage_name="analyze",
+                cache_id="stage_cache_123",
+                outputs={"beats": StageOutput(cache_id="beats_out", output_type="analysis")},
+            )
+            stage_cache.save_stage(entry)
+
+            step = PlanStep(
+                step_id="step1",
+                node_type="ANALYZE",
+                config={},
+                inputs=[],
+                cache_id="step_cache",
+                level=0,
+                stage="analyze",
+                stage_cache_id="stage_cache_123",
+            )
+
+            stage_plan = StagePlan(
+                stage_name="analyze",
+                cache_id="stage_cache_123",
+                steps=[step],
+                requires=[],
+                output_bindings={"beats": "beats_out"},
+                level=0,
+            )
+
+            plan = ExecutionPlanSexp(
+                plan_id="test_plan",
+                recipe_id="test_recipe",
+                recipe_hash="abc123",
+                steps=[step],
+                output_step_id="step1",
+                stage_plans=[stage_plan],
+                stage_order=["analyze"],
+                stage_levels={"analyze": 0},
+                stage_cache_ids={"analyze": "stage_cache_123"},
+            )
+
+            scheduler = StagePlanScheduler(stage_cache=stage_cache)
+            result = scheduler.schedule(plan)
+
+            assert result.stages_cached == 1
+            assert result.stages_completed == 0
+
+    def test_stage_inputs_loaded_from_cache(self):
+        """Stage receives inputs from required stage cache."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            stage_cache = StageCache(tmpdir)
+
+            # Pre-populate upstream stage cache
+            upstream_entry = StageCacheEntry(
+                stage_name="analyze",
+                cache_id="upstream_cache",
+                outputs={"beats": StageOutput(cache_id="beats_data", output_type="analysis")},
+            )
+            stage_cache.save_stage(upstream_entry)
+
+            # Steps for stages
+            upstream_step = PlanStep(
+                step_id="analyze_step",
+                node_type="ANALYZE",
+                config={},
+                inputs=[],
+                cache_id="analyze_cache",
+                level=0,
+                stage="analyze",
+                stage_cache_id="upstream_cache",
+            )
+
+            downstream_step = PlanStep(
+                step_id="process_step",
+                node_type="SEGMENT",
+                config={},
+                inputs=["analyze_step"],
+                cache_id="process_cache",
+                level=1,
+                stage="process",
+                stage_cache_id="downstream_cache",
+            )
+
+            upstream_plan = StagePlan(
+                stage_name="analyze",
+                cache_id="upstream_cache",
+                steps=[upstream_step],
+                requires=[],
+                output_bindings={"beats": "beats_data"},
+                level=0,
+            )
+
+            downstream_plan = StagePlan(
+                stage_name="process",
+                cache_id="downstream_cache",
+                steps=[downstream_step],
+                requires=["analyze"],
+                output_bindings={"result": "process_cache"},
+                level=1,
+            )
+
+            plan = ExecutionPlanSexp(
+                plan_id="test_plan",
+                recipe_id="test_recipe",
+                recipe_hash="abc123",
+                steps=[upstream_step, downstream_step],
+                output_step_id="process_step",
+                stage_plans=[upstream_plan, downstream_plan],
+                stage_order=["analyze", "process"],
+                stage_levels={"analyze": 0, "process": 1},
+                stage_cache_ids={"analyze": "upstream_cache", "process": "downstream_cache"},
+            )
+
+            scheduler = StagePlanScheduler(stage_cache=stage_cache)
+            result = scheduler.schedule(plan)
+
+            # Upstream should be cached, downstream executed
+            assert result.stages_cached == 1
+            assert "analyze" in result.stage_results
+            assert result.stage_results["analyze"].status == "cached"
+
+    def test_parallel_stages_same_level(self):
+        """Stages at same level can run in parallel."""
+        step_a = PlanStep(
+            step_id="step_a",
+            node_type="ANALYZE",
+            config={},
+            inputs=[],
+            cache_id="cache_a",
+            level=0,
+            stage="analyze-a",
+            stage_cache_id="stage_a",
+        )
+
+        step_b = PlanStep(
+            step_id="step_b",
+            node_type="ANALYZE",
+            config={},
+            inputs=[],
+            cache_id="cache_b",
+            level=0,
+            stage="analyze-b",
+            stage_cache_id="stage_b",
+        )
+
+        stage_a = StagePlan(
+            stage_name="analyze-a",
+            cache_id="stage_a",
+            steps=[step_a],
+            requires=[],
+            output_bindings={"beats-a": "cache_a"},
+            level=0,
+        )
+
+        stage_b = StagePlan(
+            stage_name="analyze-b",
+            cache_id="stage_b",
+            steps=[step_b],
+            requires=[],
+            output_bindings={"beats-b": "cache_b"},
+            level=0,
+        )
+
+        plan = ExecutionPlanSexp(
+            plan_id="test_plan",
+            recipe_id="test_recipe",
+            recipe_hash="abc123",
+            steps=[step_a, step_b],
+            output_step_id="step_b",
+            stage_plans=[stage_a, stage_b],
+            stage_order=["analyze-a", "analyze-b"],
+            stage_levels={"analyze-a": 0, "analyze-b": 0},
+            stage_cache_ids={"analyze-a": "stage_a", "analyze-b": "stage_b"},
+        )
+
+        scheduler = StagePlanScheduler()
+        # Group stages by level
+        stages_by_level = scheduler._group_stages_by_level(plan.stage_plans)
+
+        # Both stages should be at level 0
+        assert len(stages_by_level[0]) == 2
+
+    def test_stage_outputs_cached_after_execution(self):
+        """Stage outputs written to cache after completion."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            stage_cache = StageCache(tmpdir)
+
+            step = PlanStep(
+                step_id="step1",
+                node_type="ANALYZE",
+                config={},
+                inputs=[],
+                cache_id="step_cache",
+                level=0,
+                stage="analyze",
+                stage_cache_id="new_stage_cache",
+            )
+
+            stage_plan = StagePlan(
+                stage_name="analyze",
+                cache_id="new_stage_cache",
+                steps=[step],
+                requires=[],
+                output_bindings={"beats": "step_cache"},
+                level=0,
+            )
+
+            plan = ExecutionPlanSexp(
+                plan_id="test_plan",
+                recipe_id="test_recipe",
+                recipe_hash="abc123",
+                steps=[step],
+                output_step_id="step1",
+                stage_plans=[stage_plan],
+                stage_order=["analyze"],
+                stage_levels={"analyze": 0},
+                stage_cache_ids={"analyze": "new_stage_cache"},
+            )
+
+            scheduler = StagePlanScheduler(stage_cache=stage_cache)
+            result = scheduler.schedule(plan)
+
+            # Stage should now be cached
+            assert stage_cache.has_stage("new_stage_cache")
+
+
+class TestStageResult:
+    """Test StageResult dataclass."""
+
+    def test_stage_result_creation(self):
+        """StageResult can be created with all fields."""
+        result = StageResult(
+            stage_name="test",
+            cache_id="cache123",
+            status="completed",
+            step_results={},
+            outputs={"out": "out_cache"},
+        )
+
+        assert result.stage_name == "test"
+        assert result.status == "completed"
+        assert result.outputs["out"] == "out_cache"
+
+
+class TestStagePlanResult:
+    """Test StagePlanResult dataclass."""
+
+    def test_stage_plan_result_creation(self):
+        """StagePlanResult can be created with all fields."""
+        result = StagePlanResult(
+            plan_id="plan123",
+            status="completed",
+            stages_completed=2,
+            stages_cached=1,
+            stages_failed=0,
+        )
+
+        assert result.plan_id == "plan123"
+        assert result.stages_completed == 2
+        assert result.stages_cached == 1
+
+
+class TestSchedulerFactory:
+    """Test scheduler factory functions."""
+
+    def test_create_stage_scheduler(self):
+        """create_stage_scheduler returns StagePlanScheduler."""
+        scheduler = create_stage_scheduler()
+        assert isinstance(scheduler, StagePlanScheduler)
+
+    def test_create_stage_scheduler_with_cache(self):
+        """create_stage_scheduler accepts stage_cache."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            stage_cache = StageCache(tmpdir)
+            scheduler = create_stage_scheduler(stage_cache=stage_cache)
+            assert scheduler.stage_cache is stage_cache
--- a/core/docs/EXECUTION_MODEL.md
+++ b/core/docs/EXECUTION_MODEL.md
@@ -0,0 +1,384 @@
+# Art DAG 3-Phase Execution Model
+
+## Overview
+
+The execution model separates DAG processing into three distinct phases:
+
+```
+Recipe + Inputs → ANALYZE → Analysis Results
+                      ↓
+Analysis + Recipe → PLAN → Execution Plan (with cache IDs)
+                      ↓
+Execution Plan → EXECUTE → Cached Results
+```
+
+This separation enables:
+1. **Incremental development** - Re-run recipes without reprocessing unchanged steps
+2. **Parallel execution** - Independent steps run concurrently via Celery
+3. **Deterministic caching** - Same inputs always produce same cache IDs
+4. **Cost estimation** - Plan phase can estimate work before executing
+
+## Phase 1: Analysis
+
+### Purpose
+Extract features from input media that inform downstream processing decisions.
+
+### Inputs
+- Recipe YAML with input references
+- Input media files (by content hash)
+
+### Outputs
+Analysis results stored as JSON, keyed by input hash:
+
+```python
+@dataclass
+class AnalysisResult:
+    input_hash: str
+    features: Dict[str, Any]
+    # Audio features
+    beats: Optional[List[float]]        # Beat times in seconds
+    downbeats: Optional[List[float]]    # Bar-start times
+    tempo: Optional[float]              # BPM
+    energy: Optional[List[Tuple[float, float]]]  # (time, value) envelope
+    spectrum: Optional[Dict[str, List[Tuple[float, float]]]]  # band envelopes
+    # Video features
+    duration: float
+    frame_rate: float
+    dimensions: Tuple[int, int]
+    motion_tempo: Optional[float]       # Estimated BPM from motion
+```
+
+### Implementation
+```python
+class Analyzer:
+    def analyze(self, input_hash: str, features: List[str]) -> AnalysisResult:
+        """Extract requested features from input."""
+
+    def analyze_audio(self, path: Path) -> AudioFeatures:
+        """Extract all audio features using librosa/essentia."""
+
+    def analyze_video(self, path: Path) -> VideoFeatures:
+        """Extract video metadata and motion analysis."""
+```
+
+### Caching
+Analysis results are cached by:
+```
+analysis_cache_id = SHA3-256(input_hash + sorted(feature_names))
+```
+
+## Phase 2: Planning
+
+### Purpose
+Convert recipe + analysis into a complete execution plan with pre-computed cache IDs.
+
+### Inputs
+- Recipe YAML (parsed)
+- Analysis results for all inputs
+- Recipe parameters (user-supplied values)
+
+### Outputs
+An ExecutionPlan containing ordered steps, each with a pre-computed cache ID:
+
+```python
+@dataclass
+class ExecutionStep:
+    step_id: str                    # Unique identifier
+    node_type: str                  # Primitive type (SOURCE, SEQUENCE, etc.)
+    config: Dict[str, Any]          # Node configuration
+    input_steps: List[str]          # IDs of steps this depends on
+    cache_id: str                   # Pre-computed: hash(inputs + config)
+    estimated_duration: float       # Optional: for progress reporting
+
+@dataclass
+class ExecutionPlan:
+    plan_id: str                    # Hash of entire plan
+    recipe_id: str                  # Source recipe
+    steps: List[ExecutionStep]      # Topologically sorted
+    analysis: Dict[str, AnalysisResult]
+    output_step: str                # Final step ID
+
+    def compute_cache_ids(self):
+        """Compute all cache IDs in dependency order."""
+```
+
+### Cache ID Computation
+
+Cache IDs are computed in topological order so each step's cache ID
+incorporates its inputs' cache IDs:
+
+```python
+def compute_cache_id(step: ExecutionStep, resolved_inputs: Dict[str, str]) -> str:
+    """
+    Cache ID = SHA3-256(
+        node_type +
+        canonical_json(config) +
+        sorted([input_cache_ids])
+    )
+    """
+    components = [
+        step.node_type,
+        json.dumps(step.config, sort_keys=True),
+        *sorted(resolved_inputs[s] for s in step.input_steps)
+    ]
+    return sha3_256('|'.join(components))
+```
+
+### Plan Generation
+
+The planner expands recipe nodes into concrete steps:
+
+1. **SOURCE nodes** → Direct step with input hash as cache ID
+2. **ANALYZE nodes** → Step that references analysis results
+3. **TRANSFORM nodes** → Step with static config
+4. **TRANSFORM_DYNAMIC nodes** → Expanded to per-frame steps (or use BIND output)
+5. **SEQUENCE nodes** → Tree reduction for parallel composition
+6. **MAP nodes** → Expanded to N parallel steps + reduction
+
+### Tree Reduction for Composition
+
+Instead of sequential pairwise composition:
+```
+A → B → C → D  (3 sequential steps)
+```
+
+Use parallel tree reduction:
+```
+A ─┬─ AB ─┬─ ABCD
+B ─┘      │
+C ─┬─ CD ─┘
+D ─┘
+
+Level 0: [A, B, C, D]     (4 parallel)
+Level 1: [AB, CD]         (2 parallel)
+Level 2: [ABCD]           (1 final)
+```
+
+This reduces O(N) to O(log N) levels.
+
+## Phase 3: Execution
+
+### Purpose
+Execute the plan, skipping steps with cached results.
+
+### Inputs
+- ExecutionPlan with pre-computed cache IDs
+- Cache state (which IDs already exist)
+
+### Process
+
+1. **Claim Check**: For each step, atomically check if result is cached
+2. **Task Dispatch**: Uncached steps dispatched to Celery workers
+3. **Parallel Execution**: Independent steps run concurrently
+4. **Result Storage**: Each step stores result with its cache ID
+5. **Progress Tracking**: Real-time status updates
+
+### Hash-Based Task Claiming
+
+Prevents duplicate work when multiple workers process the same plan:
+
+```lua
+-- Redis Lua script for atomic claim
+local key = KEYS[1]
+local data = redis.call('GET', key)
+if data then
+    local status = cjson.decode(data)
+    if status.status == 'running' or
+       status.status == 'completed' or
+       status.status == 'cached' then
+        return 0  -- Already claimed/done
+    end
+end
+local claim_data = ARGV[1]
+local ttl = tonumber(ARGV[2])
+redis.call('SETEX', key, ttl, claim_data)
+return 1  -- Successfully claimed
+```
+
+### Celery Task Structure
+
+```python
+@app.task(bind=True)
+def execute_step(self, step_json: str, plan_id: str) -> dict:
+    """Execute a single step with caching."""
+    step = ExecutionStep.from_json(step_json)
+
+    # Check cache first
+    if cache.has(step.cache_id):
+        return {'status': 'cached', 'cache_id': step.cache_id}
+
+    # Try to claim this work
+    if not claim_task(step.cache_id, self.request.id):
+        # Another worker is handling it, wait for result
+        return wait_for_result(step.cache_id)
+
+    # Do the work
+    executor = get_executor(step.node_type)
+    input_paths = [cache.get(s) for s in step.input_steps]
+    output_path = cache.get_output_path(step.cache_id)
+
+    result_path = executor.execute(step.config, input_paths, output_path)
+    cache.put(step.cache_id, result_path)
+
+    return {'status': 'completed', 'cache_id': step.cache_id}
+```
+
+### Execution Orchestration
+
+```python
+class PlanExecutor:
+    def execute(self, plan: ExecutionPlan) -> ExecutionResult:
+        """Execute plan with parallel Celery tasks."""
+
+        # Group steps by level (steps at same level can run in parallel)
+        levels = self.compute_dependency_levels(plan.steps)
+
+        for level_steps in levels:
+            # Dispatch all steps at this level
+            tasks = [
+                execute_step.delay(step.to_json(), plan.plan_id)
+                for step in level_steps
+                if not self.cache.has(step.cache_id)
+            ]
+
+            # Wait for level completion
+            results = [task.get() for task in tasks]
+
+        return self.collect_results(plan)
+```
+
+## Data Flow Example
+
+### Recipe: beat-cuts
+```yaml
+nodes:
+  - id: music
+    type: SOURCE
+    config: { input: true }
+
+  - id: beats
+    type: ANALYZE
+    config: { feature: beats }
+    inputs: [music]
+
+  - id: videos
+    type: SOURCE_LIST
+    config: { input: true }
+
+  - id: slices
+    type: MAP
+    config: { operation: RANDOM_SLICE }
+    inputs:
+      items: videos
+      timing: beats
+
+  - id: final
+    type: SEQUENCE
+    inputs: [slices]
+```
+
+### Phase 1: Analysis
+```python
+# Input: music file with hash abc123
+analysis = {
+    'abc123': AnalysisResult(
+        beats=[0.0, 0.48, 0.96, 1.44, ...],
+        tempo=125.0,
+        duration=180.0
+    )
+}
+```
+
+### Phase 2: Planning
+```python
+# Expands MAP into concrete steps
+plan = ExecutionPlan(
+    steps=[
+        # Source steps
+        ExecutionStep(id='music', cache_id='abc123', ...),
+        ExecutionStep(id='video_0', cache_id='def456', ...),
+        ExecutionStep(id='video_1', cache_id='ghi789', ...),
+
+        # Slice steps (one per beat group)
+        ExecutionStep(id='slice_0', cache_id='hash(video_0+timing)', ...),
+        ExecutionStep(id='slice_1', cache_id='hash(video_1+timing)', ...),
+        ...
+
+        # Tree reduction for sequence
+        ExecutionStep(id='seq_0_1', inputs=['slice_0', 'slice_1'], ...),
+        ExecutionStep(id='seq_2_3', inputs=['slice_2', 'slice_3'], ...),
+        ExecutionStep(id='seq_final', inputs=['seq_0_1', 'seq_2_3'], ...),
+    ]
+)
+```
+
+### Phase 3: Execution
+```
+Level 0: [music, video_0, video_1] → all cached (SOURCE)
+Level 1: [slice_0, slice_1, slice_2, slice_3] → 4 parallel tasks
+Level 2: [seq_0_1, seq_2_3] → 2 parallel SEQUENCE tasks
+Level 3: [seq_final] → 1 final SEQUENCE task
+```
+
+## File Structure
+
+```
+artdag/
+├── artdag/
+│   ├── analysis/
+│   │   ├── __init__.py
+│   │   ├── analyzer.py      # Main Analyzer class
+│   │   ├── audio.py         # Audio feature extraction
+│   │   └── video.py         # Video feature extraction
+│   ├── planning/
+│   │   ├── __init__.py
+│   │   ├── planner.py       # RecipePlanner class
+│   │   ├── schema.py        # ExecutionPlan, ExecutionStep
+│   │   └── tree_reduction.py # Parallel composition optimizer
+│   └── execution/
+│       ├── __init__.py
+│       ├── executor.py      # PlanExecutor class
+│       └── claiming.py      # Hash-based task claiming
+
+art-celery/
+├── tasks/
+│   ├── __init__.py
+│   ├── analyze.py           # analyze_inputs task
+│   ├── plan.py              # generate_plan task
+│   ├── execute.py           # execute_step task
+│   └── orchestrate.py       # run_plan (coordinates all)
+├── claiming.py              # Redis Lua scripts
+└── ...
+```
+
+## CLI Interface
+
+```bash
+# Full pipeline
+artdag run-recipe recipes/beat-cuts/recipe.yaml \
+    -i music:abc123 \
+    -i videos:def456,ghi789
+
+# Phase by phase
+artdag analyze recipes/beat-cuts/recipe.yaml -i music:abc123
+# → outputs analysis.json
+
+artdag plan recipes/beat-cuts/recipe.yaml --analysis analysis.json
+# → outputs plan.json
+
+artdag execute plan.json
+# → runs with caching, skips completed steps
+
+# Dry run (show what would execute)
+artdag execute plan.json --dry-run
+# → shows which steps are cached vs need execution
+```
+
+## Benefits
+
+1. **Development Speed**: Change recipe, re-run → only affected steps execute
+2. **Parallelism**: Independent steps run on multiple Celery workers
+3. **Reproducibility**: Same inputs + recipe = same cache IDs = same output
+4. **Visibility**: Plan shows exactly what will happen before execution
+5. **Cost Control**: Estimate compute before committing resources
+6. **Fault Tolerance**: Failed runs resume from last successful step
--- a/core/docs/IPFS_PRIMARY_ARCHITECTURE.md
+++ b/core/docs/IPFS_PRIMARY_ARCHITECTURE.md
@@ -0,0 +1,443 @@
+# IPFS-Primary Architecture (Sketch)
+
+A simplified L1 architecture for large-scale distributed rendering where IPFS is the primary data store.
+
+## Current vs Simplified
+
+| Component | Current | Simplified |
+|-----------|---------|------------|
+| Local cache | Custom, per-worker | IPFS node handles it |
+| Redis content_index | content_hash → node_id | Eliminated |
+| Redis ipfs_index | content_hash → ipfs_cid | Eliminated |
+| Step inputs | File paths | IPFS CIDs |
+| Step outputs | File path + CID | Just CID |
+| Cache lookup | Local → Redis → IPFS | Just IPFS |
+
+## Core Principle
+
+**Steps receive CIDs, produce CIDs. No file paths cross machine boundaries.**
+
+```
+Step input:  [cid1, cid2, ...]
+Step output: cid_out
+```
+
+## Worker Architecture
+
+Each worker runs:
+
+```
+┌─────────────────────────────────────┐
+│           Worker Node               │
+│                                     │
+│  ┌───────────┐    ┌──────────────┐  │
+│  │  Celery   │────│  IPFS Node   │  │
+│  │  Worker   │    │  (local)     │  │
+│  └───────────┘    └──────────────┘  │
+│       │                  │          │
+│       │            ┌─────┴─────┐    │
+│       │            │ Local     │    │
+│       │            │ Blockstore│    │
+│       │            └───────────┘    │
+│       │                             │
+│  ┌────┴────┐                        │
+│  │ /tmp    │  (ephemeral workspace) │
+│  └─────────┘                        │
+└─────────────────────────────────────┘
+         │
+         │ IPFS libp2p
+         ▼
+   ┌─────────────┐
+   │ Other IPFS  │
+   │   Nodes     │
+   └─────────────┘
+```
+
+## Execution Flow
+
+### 1. Plan Generation (unchanged)
+
+```python
+plan = planner.plan(recipe, input_hashes)
+# plan.steps[].cache_id = deterministic hash
+```
+
+### 2. Input Registration
+
+Before execution, register inputs with IPFS:
+
+```python
+input_cids = {}
+for name, path in inputs.items():
+    cid = ipfs.add(path)
+    input_cids[name] = cid
+
+# Plan now carries CIDs
+plan.input_cids = input_cids
+```
+
+### 3. Step Execution
+
+```python
+@celery.task
+def execute_step(step_json: str, input_cids: dict[str, str]) -> str:
+    """Execute step, return output CID."""
+    step = ExecutionStep.from_json(step_json)
+
+    # Check if already computed (by cache_id as IPNS key or DHT lookup)
+    existing_cid = ipfs.resolve(f"/ipns/{step.cache_id}")
+    if existing_cid:
+        return existing_cid
+
+    # Fetch inputs from IPFS → local temp files
+    input_paths = []
+    for input_step_id in step.input_steps:
+        cid = input_cids[input_step_id]
+        path = ipfs.get(cid, f"/tmp/{cid}")  # IPFS node caches automatically
+        input_paths.append(path)
+
+    # Execute
+    output_path = f"/tmp/{step.cache_id}.mkv"
+    executor = get_executor(step.node_type)
+    executor.execute(step.config, input_paths, output_path)
+
+    # Add output to IPFS
+    output_cid = ipfs.add(output_path)
+
+    # Publish cache_id → CID mapping (optional, for cache hits)
+    ipfs.name_publish(step.cache_id, output_cid)
+
+    # Cleanup temp files
+    cleanup_temp(input_paths + [output_path])
+
+    return output_cid
+```
+
+### 4. Orchestration
+
+```python
+@celery.task
+def run_plan(plan_json: str) -> str:
+    """Execute plan, return final output CID."""
+    plan = ExecutionPlan.from_json(plan_json)
+
+    # CID results accumulate as steps complete
+    cid_results = dict(plan.input_cids)
+
+    for level in plan.get_steps_by_level():
+        # Parallel execution within level
+        tasks = []
+        for step in level:
+            step_input_cids = {
+                sid: cid_results[sid]
+                for sid in step.input_steps
+            }
+            tasks.append(execute_step.s(step.to_json(), step_input_cids))
+
+        # Wait for level to complete
+        results = group(tasks).apply_async().get()
+
+        # Record output CIDs
+        for step, cid in zip(level, results):
+            cid_results[step.step_id] = cid
+
+    return cid_results[plan.output_step]
+```
+
+## What's Eliminated
+
+### No more Redis indexes
+
+```python
+# BEFORE: Complex index management
+self._set_content_index(content_hash, node_id)  # Redis + local
+self._set_ipfs_index(content_hash, ipfs_cid)    # Redis + local
+node_id = self._get_content_index(content_hash)  # Check Redis, fallback local
+
+# AFTER: Just CIDs
+output_cid = ipfs.add(output_path)
+return output_cid
+```
+
+### No more local cache management
+
+```python
+# BEFORE: Custom cache with entries, metadata, cleanup
+cache.put(node_id, source_path, node_type, execution_time)
+cache.get(node_id)
+cache.has(node_id)
+cache.cleanup_lru()
+
+# AFTER: IPFS handles it
+ipfs.add(path)  # Store
+ipfs.get(cid)   # Retrieve (cached by IPFS node)
+ipfs.pin(cid)   # Keep permanently
+ipfs.gc()       # Cleanup unpinned
+```
+
+### No more content_hash vs node_id confusion
+
+```python
+# BEFORE: Two identifiers
+content_hash = sha3_256(file_bytes)  # What the file IS
+node_id = cache_id                    # What computation produced it
+# Need indexes to map between them
+
+# AFTER: One identifier
+cid = ipfs.add(file)  # Content-addressed, includes hash
+# CID IS the identifier
+```
+
+## Cache Hit Detection
+
+Two options:
+
+### Option A: IPNS (mutable names)
+
+```python
+# Publish: cache_id → CID
+ipfs.name_publish(key=cache_id, value=output_cid)
+
+# Lookup before executing
+existing = ipfs.name_resolve(cache_id)
+if existing:
+    return existing  # Cache hit
+```
+
+### Option B: DHT record
+
+```python
+# Store in DHT: cache_id → CID
+ipfs.dht_put(cache_id, output_cid)
+
+# Lookup
+existing = ipfs.dht_get(cache_id)
+```
+
+### Option C: Redis (minimal)
+
+Keep Redis just for cache_id → CID mapping:
+
+```python
+# Store
+redis.hset("artdag:cache", cache_id, output_cid)
+
+# Lookup
+existing = redis.hget("artdag:cache", cache_id)
+```
+
+This is simpler than current approach - one hash, one mapping, no content_hash/node_id confusion.
+
+## Claiming (Preventing Duplicate Work)
+
+Still need Redis for atomic claiming:
+
+```python
+# Claim before executing
+claimed = redis.set(f"artdag:claim:{cache_id}", worker_id, nx=True, ex=300)
+if not claimed:
+    # Another worker is doing it - wait for result
+    return wait_for_result(cache_id)
+```
+
+Or use IPFS pubsub for coordination.
+
+## Data Flow Diagram
+
+```
+                    ┌─────────────┐
+                    │   Recipe    │
+                    │   + Inputs  │
+                    └──────┬──────┘
+                           │
+                           ▼
+                    ┌─────────────┐
+                    │   Planner   │
+                    │ (compute    │
+                    │  cache_ids) │
+                    └──────┬──────┘
+                           │
+                           ▼
+         ┌─────────────────────────────────┐
+         │     ExecutionPlan               │
+         │  - steps with cache_ids         │
+         │  - input_cids (from ipfs.add)   │
+         └─────────────────┬───────────────┘
+                           │
+              ┌────────────┼────────────┐
+              ▼            ▼            ▼
+         ┌────────┐   ┌────────┐   ┌────────┐
+         │Worker 1│   │Worker 2│   │Worker 3│
+         │        │   │        │   │        │
+         │ IPFS   │◄──│ IPFS   │◄──│ IPFS   │
+         │ Node   │──►│ Node   │──►│ Node   │
+         └───┬────┘   └───┬────┘   └───┬────┘
+             │            │            │
+             └────────────┼────────────┘
+                          │
+                          ▼
+                   ┌─────────────┐
+                   │ Final CID   │
+                   │ (output)    │
+                   └─────────────┘
+```
+
+## Benefits
+
+1. **Simpler code** - No custom cache, no dual indexes
+2. **Automatic distribution** - IPFS handles replication
+3. **Content verification** - CIDs are self-verifying
+4. **Scalable** - Add workers = add IPFS nodes = more cache capacity
+5. **Resilient** - Any node can serve any content
+
+## Tradeoffs
+
+1. **IPFS dependency** - Every worker needs IPFS node
+2. **Initial fetch latency** - First fetch may be slower than local disk
+3. **IPNS latency** - Name resolution can be slow (Option C avoids this)
+
+## Trust Domains (Cluster Key)
+
+Systems can share work through IPFS, but how do you trust them?
+
+**Problem:** A malicious system could return wrong CIDs for computed steps.
+
+**Solution:** Cluster key creates isolated trust domains:
+
+```bash
+export ARTDAG_CLUSTER_KEY="my-secret-shared-key"
+```
+
+**How it works:**
+- The cluster key is mixed into all cache_id computations
+- Systems with the same key produce the same cache_ids
+- Systems with different keys have separate cache namespaces
+- Only share the key with trusted partners
+
+```
+cache_id = SHA3-256(cluster_key + node_type + config + inputs)
+```
+
+**Trust model:**
+| Scenario | Same Key? | Can Share Work? |
+|----------|-----------|-----------------|
+| Same organization | Yes | Yes |
+| Trusted partner | Yes (shared) | Yes |
+| Unknown system | No | No (different cache_ids) |
+
+**Configuration:**
+```yaml
+# docker-compose.yml
+environment:
+  - ARTDAG_CLUSTER_KEY=your-secret-key-here
+```
+
+**Programmatic:**
+```python
+from artdag.planning.schema import set_cluster_key
+set_cluster_key("my-secret-key")
+```
+
+## Implementation
+
+The simplified architecture is implemented in `art-celery/`:
+
+| File | Purpose |
+|------|---------|
+| `hybrid_state.py` | Hybrid state manager (Redis + IPNS) |
+| `tasks/execute_cid.py` | Step execution with CIDs |
+| `tasks/analyze_cid.py` | Analysis with CIDs |
+| `tasks/orchestrate_cid.py` | Full pipeline orchestration |
+
+### Key Functions
+
+**Registration (local → IPFS):**
+- `register_input_cid(path)` → `{cid, content_hash}`
+- `register_recipe_cid(path)` → `{cid, name, version}`
+
+**Analysis:**
+- `analyze_input_cid(input_cid, input_hash, features)` → `{analysis_cid}`
+
+**Planning:**
+- `generate_plan_cid(recipe_cid, input_cids, input_hashes, analysis_cids)` → `{plan_cid}`
+
+**Execution:**
+- `execute_step_cid(step_json, input_cids)` → `{cid}`
+- `execute_plan_from_cid(plan_cid, input_cids)` → `{output_cid}`
+
+**Full Pipeline:**
+- `run_recipe_cid(recipe_cid, input_cids, input_hashes)` → `{output_cid, all_cids}`
+- `run_from_local(recipe_path, input_paths)` → registers + runs
+
+### Hybrid State Manager
+
+For distributed L1 coordination, use the `HybridStateManager` which provides:
+
+**Fast path (local Redis):**
+- `get_cached_cid(cache_id)` / `set_cached_cid(cache_id, cid)` - microsecond lookups
+- `try_claim(cache_id, worker_id)` / `release_claim(cache_id)` - atomic claiming
+- `get_analysis_cid()` / `set_analysis_cid()` - analysis cache
+- `get_plan_cid()` / `set_plan_cid()` - plan cache
+- `get_run_cid()` / `set_run_cid()` - run cache
+
+**Slow path (background IPNS sync):**
+- Periodically syncs local state with global IPNS state (default: every 30s)
+- Pulls new entries from remote nodes
+- Pushes local updates to IPNS
+
+**Configuration:**
+```bash
+# Enable IPNS sync
+export ARTDAG_IPNS_SYNC=true
+export ARTDAG_IPNS_SYNC_INTERVAL=30  # seconds
+```
+
+**Usage:**
+```python
+from hybrid_state import get_state_manager
+
+state = get_state_manager()
+
+# Fast local lookup
+cid = state.get_cached_cid(cache_id)
+
+# Fast local write (synced in background)
+state.set_cached_cid(cache_id, output_cid)
+
+# Atomic claim
+if state.try_claim(cache_id, worker_id):
+    # We have the lock
+    ...
+```
+
+**Trade-offs:**
+- Local Redis: Fast (microseconds), single node
+- IPNS sync: Slow (seconds), eventually consistent across nodes
+- Duplicate work: Accepted (idempotent - same inputs → same CID)
+
+### Redis Usage (minimal)
+
+| Key | Type | Purpose |
+|-----|------|---------|
+| `artdag:cid_cache` | Hash | cache_id → output CID |
+| `artdag:analysis_cache` | Hash | input_hash:features → analysis CID |
+| `artdag:plan_cache` | Hash | plan_id → plan CID |
+| `artdag:run_cache` | Hash | run_id → output CID |
+| `artdag:claim:{cache_id}` | String | worker_id (TTL 5 min) |
+
+## Migration Path
+
+1. Keep current system working ✓
+2. Add CID-based tasks ✓
+   - `execute_cid.py` ✓
+   - `analyze_cid.py` ✓
+   - `orchestrate_cid.py` ✓
+3. Add `--ipfs-primary` flag to CLI ✓
+4. Add hybrid state manager for L1 coordination ✓
+5. Gradually deprecate local cache code
+6. Remove old tasks when CID versions are stable
+
+## See Also
+
+- [L1_STORAGE.md](L1_STORAGE.md) - Current L1 architecture
+- [EXECUTION_MODEL.md](EXECUTION_MODEL.md) - 3-phase model
--- a/core/docs/L1_STORAGE.md
+++ b/core/docs/L1_STORAGE.md
@@ -0,0 +1,181 @@
+# L1 Distributed Storage Architecture
+
+This document describes how data is stored when running artdag on L1 (the distributed rendering layer).
+
+## Overview
+
+L1 uses four storage systems working together:
+
+| System | Purpose | Data Stored |
+|--------|---------|-------------|
+| **Local Cache** | Hot storage (fast access) | Media files, plans, analysis |
+| **IPFS** | Durable content-addressed storage | All media outputs |
+| **Redis** | Coordination & indexes | Claims, mappings, run status |
+| **PostgreSQL** | Metadata & ownership | User data, provenance |
+
+## Storage Flow
+
+When a step executes on L1:
+
+```
+1. Executor produces output file
+2. Store in local cache (fast)
+3. Compute content_hash = SHA3-256(file)
+4. Upload to IPFS → get ipfs_cid
+5. Update indexes:
+   - content_hash → node_id (Redis + local)
+   - content_hash → ipfs_cid (Redis + local)
+```
+
+Every intermediate step output (SEGMENT, SEQUENCE, etc.) gets its own IPFS CID.
+
+## Local Cache
+
+Hot storage on each worker node:
+
+```
+cache_dir/
+  index.json                    # Cache metadata
+  content_index.json            # content_hash → node_id
+  ipfs_index.json               # content_hash → ipfs_cid
+  plans/
+    {plan_id}.json              # Cached execution plans
+  analysis/
+    {hash}.json                 # Analysis results
+  {node_id}/
+    output.mkv                  # Media output
+    metadata.json               # CacheEntry metadata
+```
+
+## IPFS - Durable Media Storage
+
+All media files are stored in IPFS for durability and content-addressing.
+
+**Supported pinning providers:**
+- Pinata
+- web3.storage
+- NFT.Storage
+- Infura IPFS
+- Filebase (S3-compatible)
+- Storj (decentralized)
+- Local IPFS node
+
+**Configuration:**
+```bash
+IPFS_API=/ip4/127.0.0.1/tcp/5001  # Local IPFS daemon
+```
+
+## Redis - Coordination
+
+Redis handles distributed coordination across workers.
+
+### Key Patterns
+
+| Key | Type | Purpose |
+|-----|------|---------|
+| `artdag:run:{run_id}` | String | Run status, timestamps, celery task ID |
+| `artdag:content_index` | Hash | content_hash → node_id mapping |
+| `artdag:ipfs_index` | Hash | content_hash → ipfs_cid mapping |
+| `artdag:claim:{cache_id}` | String | Task claiming (prevents duplicate work) |
+
+### Task Claiming
+
+Lua scripts ensure atomic claiming across workers:
+
+```
+Status flow: PENDING → CLAIMED → RUNNING → COMPLETED/CACHED/FAILED
+TTL: 5 minutes for claims, 1 hour for results
+```
+
+This prevents two workers from executing the same step.
+
+## PostgreSQL - Metadata
+
+Stores ownership, provenance, and sharing metadata.
+
+### Tables
+
+```sql
+-- Core cache (shared)
+cache_items (content_hash, ipfs_cid, created_at)
+
+-- Per-user ownership
+item_types (content_hash, actor_id, type, metadata)
+
+-- Run cache (deterministic identity)
+run_cache (
+  run_id,           -- SHA3-256(sorted_inputs + recipe)
+  output_hash,
+  ipfs_cid,
+  provenance_cid,
+  recipe, inputs, actor_id
+)
+
+-- Storage backends
+storage_backends (actor_id, provider_type, config, capacity_gb)
+
+-- What's stored where
+storage_pins (content_hash, storage_id, ipfs_cid, pin_type)
+```
+
+## Cache Lookup Flow
+
+When a worker needs a file:
+
+```
+1. Check local cache by cache_id (fastest)
+2. Check Redis content_index: content_hash → node_id
+3. Check PostgreSQL cache_items
+4. Retrieve from IPFS by CID
+5. Store in local cache for next hit
+```
+
+## Local vs L1 Comparison
+
+| Feature | Local Testing | L1 Distributed |
+|---------|---------------|----------------|
+| Local cache | Yes | Yes |
+| IPFS | No | Yes |
+| Redis | No | Yes |
+| PostgreSQL | No | Yes |
+| Multi-worker | No | Yes |
+| Task claiming | No | Yes (Lua scripts) |
+| Durability | Filesystem only | IPFS + PostgreSQL |
+
+## Content Addressing
+
+All storage uses SHA3-256 (quantum-resistant):
+
+- **Files:** `content_hash = SHA3-256(file_bytes)`
+- **Computation:** `cache_id = SHA3-256(type + config + input_hashes)`
+- **Run identity:** `run_id = SHA3-256(sorted_inputs + recipe)`
+- **Plans:** `plan_id = SHA3-256(recipe + inputs + analysis)`
+
+This ensures:
+- Same inputs → same outputs (reproducibility)
+- Automatic deduplication across workers
+- Content verification (tamper detection)
+
+## Configuration
+
+Default locations:
+
+```bash
+# Local cache
+~/.artdag/cache           # Default
+/data/cache               # Docker
+
+# Redis
+redis://localhost:6379/5
+
+# PostgreSQL
+postgresql://user:pass@host/artdag
+
+# IPFS
+/ip4/127.0.0.1/tcp/5001
+```
+
+## See Also
+
+- [OFFLINE_TESTING.md](OFFLINE_TESTING.md) - Local testing without L1
+- [EXECUTION_MODEL.md](EXECUTION_MODEL.md) - 3-phase execution model
--- a/core/docs/OFFLINE_TESTING.md
+++ b/core/docs/OFFLINE_TESTING.md
@@ -0,0 +1,211 @@
+# Offline Testing Strategy
+
+This document describes how to test artdag locally without requiring Redis, IPFS, Celery, or any external distributed infrastructure.
+
+## Overview
+
+The artdag system uses a **3-Phase Execution Model** that enables complete offline testing:
+
+1. **Analysis** - Extract features from input media
+2. **Planning** - Generate deterministic execution plan with pre-computed cache IDs
+3. **Execution** - Run plan steps, skipping cached results
+
+This separation allows testing each phase independently and running full pipelines locally.
+
+## Quick Start
+
+Run a full offline test with a video file:
+
+```bash
+./examples/test_local.sh ../artdag-art-source/dog.mkv
+```
+
+This will:
+1. Compute the SHA3-256 hash of the input video
+2. Run the `simple_sequence` recipe
+3. Store all outputs in `test_cache/`
+
+## Test Scripts
+
+### `test_local.sh` - Full Pipeline Test
+
+Location: `./examples/test_local.sh`
+
+Runs the complete artdag pipeline offline with a real video file.
+
+**Usage:**
+```bash
+./examples/test_local.sh <video_file>
+```
+
+**Example:**
+```bash
+./examples/test_local.sh ../artdag-art-source/dog.mkv
+```
+
+**What it does:**
+- Computes content hash of input video
+- Runs `artdag run-recipe` with `simple_sequence.yaml`
+- Stores outputs in `test_cache/` directory
+- No external services required
+
+### `test_plan.py` - Planning Phase Test
+
+Location: `./examples/test_plan.py`
+
+Tests the planning phase without requiring any media files.
+
+**Usage:**
+```bash
+python3 examples/test_plan.py
+```
+
+**What it tests:**
+- Recipe loading and YAML parsing
+- Execution plan generation
+- Cache ID computation (deterministic)
+- Multi-level parallel step organization
+- Human-readable step names
+- Multi-output support
+
+**Output:**
+- Prints plan structure to console
+- Saves full plan to `test_plan_output.json`
+
+### `simple_sequence.yaml` - Sample Recipe
+
+Location: `./examples/simple_sequence.yaml`
+
+A simple recipe for testing that:
+- Takes a video input
+- Extracts two segments (0-2s and 5-7s)
+- Concatenates them with SEQUENCE
+
+## Test Outputs
+
+All test outputs are stored locally and git-ignored:
+
+| Output | Description |
+|--------|-------------|
+| `test_cache/` | Cached execution results (media files, analysis, plans) |
+| `test_cache/plans/` | Cached execution plans by plan_id |
+| `test_cache/analysis/` | Cached analysis results by input hash |
+| `test_plan_output.json` | Generated execution plan from `test_plan.py` |
+
+## Unit Tests
+
+The project includes a comprehensive pytest test suite in `tests/`:
+
+```bash
+# Run all unit tests
+pytest
+
+# Run specific test file
+pytest tests/test_dag.py
+pytest tests/test_engine.py
+pytest tests/test_cache.py
+```
+
+## Testing Each Phase
+
+### Phase 1: Analysis Only
+
+Extract features without full execution:
+
+```bash
+python3 -m artdag.cli analyze <recipe> -i <name>:<hash>@<path> --features beats,energy
+```
+
+### Phase 2: Planning Only
+
+Generate an execution plan (no media needed):
+
+```bash
+python3 -m artdag.cli plan <recipe> -i <name>:<hash>
+```
+
+Or use the test script:
+
+```bash
+python3 examples/test_plan.py
+```
+
+### Phase 3: Execution Only
+
+Execute a pre-generated plan:
+
+```bash
+python3 -m artdag.cli execute plan.json
+```
+
+With dry-run to see what would execute:
+
+```bash
+python3 -m artdag.cli execute plan.json --dry-run
+```
+
+## Key Testing Features
+
+### Content Addressing
+
+All nodes have deterministic IDs computed as:
+```
+SHA3-256(type + config + sorted(input_IDs))
+```
+
+Same inputs always produce same cache IDs, enabling:
+- Reproducibility across runs
+- Automatic deduplication
+- Incremental execution (only changed steps run)
+
+### Local Caching
+
+The `test_cache/` directory stores:
+- `plans/{plan_id}.json` - Execution plans (deterministic hash of recipe + inputs + analysis)
+- `analysis/{hash}.json` - Analysis results (audio beats, tempo, energy)
+- `{cache_id}/output.mkv` - Media outputs from each step
+
+Subsequent test runs automatically skip cached steps. Plans are cached by their `plan_id`, which is a SHA3-256 hash of the recipe, input hashes, and analysis results - so the same recipe with the same inputs always produces the same plan.
+
+### No External Dependencies
+
+Offline testing requires:
+- Python 3.9+
+- ffmpeg (for media processing)
+- No Redis, IPFS, Celery, or network access
+
+## Debugging Tips
+
+1. **Check cache contents:**
+   ```bash
+   ls -la test_cache/
+   ls -la test_cache/plans/
+   ```
+
+2. **View cached plan:**
+   ```bash
+   cat test_cache/plans/*.json | python3 -m json.tool | head -50
+   ```
+
+3. **View execution plan structure:**
+   ```bash
+   cat test_plan_output.json | python3 -m json.tool
+   ```
+
+4. **Run with verbose output:**
+   ```bash
+   python3 -m artdag.cli run-recipe examples/simple_sequence.yaml \
+       -i "video:HASH@path" \
+       --cache-dir test_cache \
+       -v
+   ```
+
+5. **Dry-run to see what would execute:**
+   ```bash
+   python3 -m artdag.cli execute plan.json --dry-run
+   ```
+
+## See Also
+
+- [L1_STORAGE.md](L1_STORAGE.md) - Distributed storage on L1 (IPFS, Redis, PostgreSQL)
+- [EXECUTION_MODEL.md](EXECUTION_MODEL.md) - 3-phase execution model
--- a/core/effects/identity/README.md
+++ b/core/effects/identity/README.md
@@ -0,0 +1,35 @@
+# Identity Effect
+
+The identity effect returns its input unchanged. It serves as the foundational primitive in the effects registry.
+
+## Purpose
+
+- **Testing**: Verify the effects pipeline is working correctly
+- **No-op placeholder**: Use when an effect slot requires a value but no transformation is needed
+- **Composition base**: The neutral element for effect composition
+
+## Signature
+
+```
+identity(input) → input
+```
+
+## Properties
+
+- **Idempotent**: `identity(identity(x)) = identity(x)`
+- **Neutral**: For any effect `f`, `identity ∘ f = f ∘ identity = f`
+
+## Implementation
+
+```python
+def identity(input):
+    return input
+```
+
+## Content Hash
+
+The identity effect is content-addressed by its behavior: given any input, the output hash equals the input hash.
+
+## Owner
+
+Registered by `@giles@artdag.rose-ash.com`
--- a/core/effects/identity/requirements.txt
+++ b/core/effects/identity/requirements.txt
@@ -0,0 +1,2 @@
+# Identity effect has no dependencies
+# It's a pure function: identity(x) = x
--- a/core/examples/simple_sequence.yaml
+++ b/core/examples/simple_sequence.yaml
@@ -0,0 +1,42 @@
+# Simple sequence recipe - concatenates segments from a single input video
+name: simple_sequence
+version: "1.0"
+description: "Split input into segments and concatenate them"
+owner: test@local
+
+dag:
+  nodes:
+    # Input source - variable (provided at runtime)
+    - id: video
+      type: SOURCE
+      config:
+        input: true
+        name: "Input Video"
+        description: "The video to process"
+
+    # Extract first 2 seconds
+    - id: seg1
+      type: SEGMENT
+      config:
+        start: 0.0
+        end: 2.0
+      inputs:
+        - video
+
+    # Extract seconds 5-7
+    - id: seg2
+      type: SEGMENT
+      config:
+        start: 5.0
+        end: 7.0
+      inputs:
+        - video
+
+    # Concatenate the segments
+    - id: output
+      type: SEQUENCE
+      inputs:
+        - seg1
+        - seg2
+
+  output: output
--- a/core/examples/test_local.sh
+++ b/core/examples/test_local.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Local testing script for artdag
+# Tests the 3-phase execution without Redis/IPFS
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+ARTDAG_DIR="$(dirname "$SCRIPT_DIR")"
+CACHE_DIR="${ARTDAG_DIR}/test_cache"
+RECIPE="${SCRIPT_DIR}/simple_sequence.yaml"
+
+# Check for input video
+if [ -z "$1" ]; then
+    echo "Usage: $0 <video_file>"
+    echo ""
+    echo "Example:"
+    echo "  $0 /path/to/test_video.mp4"
+    exit 1
+fi
+
+VIDEO_PATH="$1"
+if [ ! -f "$VIDEO_PATH" ]; then
+    echo "Error: Video file not found: $VIDEO_PATH"
+    exit 1
+fi
+
+# Compute content hash of input
+echo "=== Computing input hash ==="
+VIDEO_HASH=$(python3 -c "
+import hashlib
+with open('$VIDEO_PATH', 'rb') as f:
+    print(hashlib.sha3_256(f.read()).hexdigest())
+")
+echo "Input hash: ${VIDEO_HASH:0:16}..."
+
+# Change to artdag directory
+cd "$ARTDAG_DIR"
+
+# Run the full pipeline
+echo ""
+echo "=== Running artdag run-recipe ==="
+echo "Recipe: $RECIPE"
+echo "Input: video:${VIDEO_HASH:0:16}...@$VIDEO_PATH"
+echo "Cache: $CACHE_DIR"
+echo ""
+
+python3 -m artdag.cli run-recipe "$RECIPE" \
+    -i "video:${VIDEO_HASH}@${VIDEO_PATH}" \
+    --cache-dir "$CACHE_DIR"
+
+echo ""
+echo "=== Done ==="
+echo "Cache directory: $CACHE_DIR"
+echo "Use 'ls -la $CACHE_DIR' to see cached outputs"
--- a/core/examples/test_plan.py
+++ b/core/examples/test_plan.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+"""
+Test the planning phase locally.
+
+This tests the new human-readable names and multi-output support
+without requiring actual video files or execution.
+"""
+
+import hashlib
+import json
+import sys
+from pathlib import Path
+
+# Add artdag to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from artdag.planning import RecipePlanner, Recipe, ExecutionPlan
+
+
+def main():
+    # Load recipe
+    recipe_path = Path(__file__).parent / "simple_sequence.yaml"
+    if not recipe_path.exists():
+        print(f"Recipe not found: {recipe_path}")
+        return 1
+
+    recipe = Recipe.from_file(recipe_path)
+    print(f"Recipe: {recipe.name} v{recipe.version}")
+    print(f"Nodes: {len(recipe.nodes)}")
+    print()
+
+    # Fake input hash (would be real content hash in production)
+    fake_input_hash = hashlib.sha3_256(b"fake video content").hexdigest()
+    input_hashes = {"video": fake_input_hash}
+
+    print(f"Input: video -> {fake_input_hash[:16]}...")
+    print()
+
+    # Generate plan
+    planner = RecipePlanner(use_tree_reduction=True)
+    plan = planner.plan(
+        recipe=recipe,
+        input_hashes=input_hashes,
+        seed=42,  # Optional seed for reproducibility
+    )
+
+    print("=== Generated Plan ===")
+    print(f"Plan ID: {plan.plan_id[:24]}...")
+    print(f"Plan Name: {plan.name}")
+    print(f"Recipe Name: {plan.recipe_name}")
+    print(f"Output: {plan.output_name}")
+    print(f"Steps: {len(plan.steps)}")
+    print()
+
+    # Show steps by level
+    steps_by_level = plan.get_steps_by_level()
+    for level in sorted(steps_by_level.keys()):
+        steps = steps_by_level[level]
+        print(f"Level {level}: {len(steps)} step(s)")
+        for step in steps:
+            # Show human-readable name
+            name = step.name or step.step_id[:20]
+            print(f"  - {name}")
+            print(f"    Type: {step.node_type}")
+            print(f"    Cache ID: {step.cache_id[:16]}...")
+            if step.outputs:
+                print(f"    Outputs: {len(step.outputs)}")
+                for out in step.outputs:
+                    print(f"      - {out.name} ({out.media_type})")
+            if step.inputs:
+                print(f"    Inputs: {[inp.name for inp in step.inputs]}")
+        print()
+
+    # Save plan for inspection
+    plan_path = Path(__file__).parent.parent / "test_plan_output.json"
+    with open(plan_path, "w") as f:
+        f.write(plan.to_json())
+    print(f"Plan saved to: {plan_path}")
+
+    # Show plan JSON structure
+    print()
+    print("=== Plan JSON Preview ===")
+    plan_dict = json.loads(plan.to_json())
+    # Show first step as example
+    if plan_dict.get("steps"):
+        first_step = plan_dict["steps"][0]
+        print(json.dumps(first_step, indent=2)[:500] + "...")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/core/pyproject.toml
+++ b/core/pyproject.toml
@@ -0,0 +1,62 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "artdag"
+version = "0.1.0"
+description = "Content-addressed DAG execution engine with ActivityPub ownership"
+readme = "README.md"
+license = {text = "MIT"}
+requires-python = ">=3.10"
+authors = [
+    {name = "Giles", email = "giles@rose-ash.com"}
+]
+keywords = ["dag", "content-addressed", "activitypub", "video", "processing"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+    "cryptography>=41.0.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0",
+]
+analysis = [
+    "librosa>=0.10.0",
+    "numpy>=1.24.0",
+    "pyyaml>=6.0",
+]
+cv = [
+    "opencv-python>=4.8.0",
+]
+all = [
+    "librosa>=0.10.0",
+    "numpy>=1.24.0",
+    "pyyaml>=6.0",
+    "opencv-python>=4.8.0",
+]
+
+[project.scripts]
+artdag = "artdag.cli:main"
+
+[project.urls]
+Homepage = "https://artdag.rose-ash.com"
+Repository = "https://github.com/giles/artdag"
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["artdag*"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
--- a/core/scripts/compute_repo_hash.py
+++ b/core/scripts/compute_repo_hash.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+"""
+Compute content hash of a git repository.
+
+Hashes all tracked files (respects .gitignore) in sorted order.
+"""
+
+import hashlib
+import subprocess
+import sys
+from pathlib import Path
+
+
+def repo_hash(repo_path: Path) -> str:
+    """
+    Compute SHA3-256 hash of all tracked files in a repo.
+
+    Uses git ls-files to respect .gitignore.
+    Files are hashed in sorted order for determinism.
+    Each file contributes: relative_path + file_contents
+    """
+    # Get list of tracked files
+    result = subprocess.run(
+        ["git", "ls-files"],
+        cwd=repo_path,
+        capture_output=True,
+        text=True,
+        check=True,
+    )
+
+    files = sorted(result.stdout.strip().split("\n"))
+
+    hasher = hashlib.sha3_256()
+
+    for rel_path in files:
+        if not rel_path:
+            continue
+
+        file_path = repo_path / rel_path
+        if not file_path.is_file():
+            continue
+
+        # Include path in hash
+        hasher.update(rel_path.encode())
+
+        # Include contents
+        with open(file_path, "rb") as f:
+            for chunk in iter(lambda: f.read(65536), b""):
+                hasher.update(chunk)
+
+    return hasher.hexdigest()
+
+
+def main():
+    if len(sys.argv) > 1:
+        repo_path = Path(sys.argv[1])
+    else:
+        repo_path = Path.cwd()
+
+    h = repo_hash(repo_path)
+    print(f"Repository: {repo_path}")
+    print(f"Hash: {h}")
+    return h
+
+
+if __name__ == "__main__":
+    main()
--- a/core/scripts/install-ffglitch.sh
+++ b/core/scripts/install-ffglitch.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+# Install ffglitch for datamosh effects
+# Usage: ./install-ffglitch.sh [install_dir]
+
+set -e
+
+FFGLITCH_VERSION="0.10.2"
+INSTALL_DIR="${1:-/usr/local/bin}"
+
+# Detect architecture
+ARCH=$(uname -m)
+case "$ARCH" in
+    x86_64)
+        URL="https://ffglitch.org/pub/bin/linux64/ffglitch-${FFGLITCH_VERSION}-linux-x86_64.zip"
+        ARCHIVE="ffglitch.zip"
+        ;;
+    aarch64)
+        URL="https://ffglitch.org/pub/bin/linux-aarch64/ffglitch-${FFGLITCH_VERSION}-linux-aarch64.7z"
+        ARCHIVE="ffglitch.7z"
+        ;;
+    *)
+        echo "Unsupported architecture: $ARCH"
+        exit 1
+        ;;
+esac
+
+echo "Installing ffglitch ${FFGLITCH_VERSION} for ${ARCH}..."
+
+# Create temp directory
+TMPDIR=$(mktemp -d)
+cd "$TMPDIR"
+
+# Download
+echo "Downloading from ${URL}..."
+curl -L -o "$ARCHIVE" "$URL"
+
+# Extract
+echo "Extracting..."
+if [[ "$ARCHIVE" == *.zip ]]; then
+    unzip -q "$ARCHIVE"
+elif [[ "$ARCHIVE" == *.7z ]]; then
+    # Requires p7zip
+    if ! command -v 7z &> /dev/null; then
+        echo "7z not found. Install with: apt install p7zip-full"
+        exit 1
+    fi
+    7z x "$ARCHIVE" > /dev/null
+fi
+
+# Find and install binaries
+echo "Installing to ${INSTALL_DIR}..."
+find . -name "ffgac" -o -name "ffedit" | while read bin; do
+    chmod +x "$bin"
+    if [ -w "$INSTALL_DIR" ]; then
+        cp "$bin" "$INSTALL_DIR/"
+    else
+        sudo cp "$bin" "$INSTALL_DIR/"
+    fi
+    echo "  Installed: $(basename $bin)"
+done
+
+# Cleanup
+cd /
+rm -rf "$TMPDIR"
+
+# Verify
+echo ""
+echo "Verifying installation..."
+if command -v ffgac &> /dev/null; then
+    echo "ffgac: $(which ffgac)"
+else
+    echo "Warning: ffgac not in PATH. Add ${INSTALL_DIR} to PATH."
+fi
+
+if command -v ffedit &> /dev/null; then
+    echo "ffedit: $(which ffedit)"
+else
+    echo "Warning: ffedit not in PATH. Add ${INSTALL_DIR} to PATH."
+fi
+
+echo ""
+echo "Done! ffglitch installed."
--- a/core/scripts/register_identity_effect.py
+++ b/core/scripts/register_identity_effect.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+"""
+Register the identity effect owned by giles.
+"""
+
+import hashlib
+from pathlib import Path
+import sys
+
+# Add parent to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from artdag.activitypub.ownership import OwnershipManager
+
+
+def folder_hash(folder: Path) -> str:
+    """
+    Compute SHA3-256 hash of an entire folder.
+
+    Hashes all files in sorted order for deterministic results.
+    Each file contributes: relative_path + file_contents
+    """
+    hasher = hashlib.sha3_256()
+
+    # Get all files sorted by relative path
+    files = sorted(folder.rglob("*"))
+
+    for file_path in files:
+        if file_path.is_file():
+            # Include relative path in hash for structure
+            rel_path = file_path.relative_to(folder)
+            hasher.update(str(rel_path).encode())
+
+            # Include file contents
+            with open(file_path, "rb") as f:
+                for chunk in iter(lambda: f.read(65536), b""):
+                    hasher.update(chunk)
+
+    return hasher.hexdigest()
+
+
+def main():
+    # Use .cache as the ownership data directory
+    base_dir = Path(__file__).parent.parent / ".cache" / "ownership"
+    manager = OwnershipManager(base_dir)
+
+    # Create or get giles actor
+    actor = manager.get_actor("giles")
+    if not actor:
+        actor = manager.create_actor("giles", "Giles Bradshaw")
+        print(f"Created actor: {actor.handle}")
+    else:
+        print(f"Using existing actor: {actor.handle}")
+
+    # Register the identity effect folder
+    effect_path = Path(__file__).parent.parent / "effects" / "identity"
+    cid = folder_hash(effect_path)
+
+    asset, activity = manager.register_asset(
+        actor=actor,
+        name="effect:identity",
+        cid=cid,
+        local_path=effect_path,
+        tags=["effect", "primitive", "identity"],
+        metadata={
+            "type": "effect",
+            "description": "The identity effect - returns input unchanged",
+            "signature": "identity(input) → input",
+        },
+    )
+
+    print(f"\nRegistered: {asset.name}")
+    print(f"  Hash: {asset.cid}")
+    print(f"  Path: {asset.local_path}")
+    print(f"  Activity: {activity.activity_id}")
+    print(f"  Owner: {actor.handle}")
+
+    # Verify ownership
+    verified = manager.verify_ownership(asset.name, actor)
+    print(f"  Ownership verified: {verified}")
+
+if __name__ == "__main__":
+    main()
--- a/core/scripts/setup_actor.py
+++ b/core/scripts/setup_actor.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+"""
+Set up actor with keypair stored securely.
+
+Private key: ~/.artdag/keys/{username}.pem
+Public key: exported for registry
+"""
+
+import json
+import os
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+# Add artdag to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from cryptography.hazmat.primitives import serialization
+from cryptography.hazmat.primitives.asymmetric import rsa
+from cryptography.hazmat.backends import default_backend
+
+
+def create_keypair():
+    """Generate RSA-2048 keypair."""
+    private_key = rsa.generate_private_key(
+        public_exponent=65537,
+        key_size=2048,
+        backend=default_backend(),
+    )
+    return private_key
+
+
+def save_private_key(private_key, path: Path):
+    """Save private key to PEM file."""
+    pem = private_key.private_bytes(
+        encoding=serialization.Encoding.PEM,
+        format=serialization.PrivateFormat.PKCS8,
+        encryption_algorithm=serialization.NoEncryption(),
+    )
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_bytes(pem)
+    os.chmod(path, 0o600)  # Owner read/write only
+    return pem.decode()
+
+
+def get_public_key_pem(private_key) -> str:
+    """Extract public key as PEM string."""
+    public_key = private_key.public_key()
+    pem = public_key.public_bytes(
+        encoding=serialization.Encoding.PEM,
+        format=serialization.PublicFormat.SubjectPublicKeyInfo,
+    )
+    return pem.decode()
+
+
+def create_actor_json(username: str, display_name: str, public_key_pem: str, domain: str = "artdag.rose-ash.com"):
+    """Create ActivityPub actor JSON."""
+    return {
+        "@context": [
+            "https://www.w3.org/ns/activitystreams",
+            "https://w3id.org/security/v1"
+        ],
+        "type": "Person",
+        "id": f"https://{domain}/users/{username}",
+        "preferredUsername": username,
+        "name": display_name,
+        "inbox": f"https://{domain}/users/{username}/inbox",
+        "outbox": f"https://{domain}/users/{username}/outbox",
+        "publicKey": {
+            "id": f"https://{domain}/users/{username}#main-key",
+            "owner": f"https://{domain}/users/{username}",
+            "publicKeyPem": public_key_pem
+        }
+    }
+
+
+def main():
+    username = "giles"
+    display_name = "Giles Bradshaw"
+    domain = "artdag.rose-ash.com"
+
+    keys_dir = Path.home() / ".artdag" / "keys"
+    private_key_path = keys_dir / f"{username}.pem"
+
+    # Check if key already exists
+    if private_key_path.exists():
+        print(f"Private key already exists: {private_key_path}")
+        print("Delete it first if you want to regenerate.")
+        sys.exit(1)
+
+    # Create new keypair
+    print(f"Creating new keypair for @{username}@{domain}...")
+    private_key = create_keypair()
+
+    # Save private key
+    save_private_key(private_key, private_key_path)
+    print(f"Private key saved: {private_key_path}")
+    print(f"  Mode: 600 (owner read/write only)")
+    print(f"  BACK THIS UP!")
+
+    # Get public key
+    public_key_pem = get_public_key_pem(private_key)
+
+    # Create actor JSON
+    actor = create_actor_json(username, display_name, public_key_pem, domain)
+
+    # Output actor JSON
+    actor_json = json.dumps(actor, indent=2)
+    print(f"\nActor JSON (for registry/actors/{username}.json):")
+    print(actor_json)
+
+    # Save to registry
+    registry_path = Path.home() / "artdag-registry" / "actors" / f"{username}.json"
+    registry_path.parent.mkdir(parents=True, exist_ok=True)
+    registry_path.write_text(actor_json)
+    print(f"\nSaved to: {registry_path}")
+
+
+if __name__ == "__main__":
+    main()
--- a/core/scripts/sign_assets.py
+++ b/core/scripts/sign_assets.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+"""
+Sign assets in the registry with giles's private key.
+
+Creates ActivityPub Create activities with RSA signatures.
+"""
+
+import base64
+import hashlib
+import json
+import sys
+import uuid
+from datetime import datetime, timezone
+from pathlib import Path
+
+from cryptography.hazmat.primitives import hashes, serialization
+from cryptography.hazmat.primitives.asymmetric import padding
+from cryptography.hazmat.backends import default_backend
+
+
+def load_private_key(path: Path):
+    """Load private key from PEM file."""
+    pem_data = path.read_bytes()
+    return serialization.load_pem_private_key(pem_data, password=None, backend=default_backend())
+
+
+def sign_data(private_key, data: str) -> str:
+    """Sign data with RSA private key, return base64 signature."""
+    signature = private_key.sign(
+        data.encode(),
+        padding.PKCS1v15(),
+        hashes.SHA256(),
+    )
+    return base64.b64encode(signature).decode()
+
+
+def create_activity(actor_id: str, asset_name: str, cid: str, asset_type: str, domain: str = "artdag.rose-ash.com"):
+    """Create a Create activity for an asset."""
+    now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+    return {
+        "activity_id": str(uuid.uuid4()),
+        "activity_type": "Create",
+        "actor_id": actor_id,
+        "object_data": {
+            "type": asset_type_to_ap(asset_type),
+            "name": asset_name,
+            "id": f"https://{domain}/objects/{cid}",
+            "contentHash": {
+                "algorithm": "sha3-256",
+                "value": cid
+            },
+            "attributedTo": actor_id
+        },
+        "published": now,
+    }
+
+
+def asset_type_to_ap(asset_type: str) -> str:
+    """Convert asset type to ActivityPub type."""
+    type_map = {
+        "image": "Image",
+        "video": "Video",
+        "audio": "Audio",
+        "effect": "Application",
+        "infrastructure": "Application",
+    }
+    return type_map.get(asset_type, "Document")
+
+
+def sign_activity(activity: dict, private_key, actor_id: str, domain: str = "artdag.rose-ash.com") -> dict:
+    """Add signature to activity."""
+    # Create canonical string to sign
+    to_sign = json.dumps(activity["object_data"], sort_keys=True, separators=(",", ":"))
+
+    signature_value = sign_data(private_key, to_sign)
+
+    activity["signature"] = {
+        "type": "RsaSignature2017",
+        "creator": f"{actor_id}#main-key",
+        "created": activity["published"],
+        "signatureValue": signature_value
+    }
+
+    return activity
+
+
+def main():
+    username = "giles"
+    domain = "artdag.rose-ash.com"
+    actor_id = f"https://{domain}/users/{username}"
+
+    # Load private key
+    private_key_path = Path.home() / ".artdag" / "keys" / f"{username}.pem"
+    if not private_key_path.exists():
+        print(f"Private key not found: {private_key_path}")
+        print("Run setup_actor.py first.")
+        sys.exit(1)
+
+    private_key = load_private_key(private_key_path)
+    print(f"Loaded private key: {private_key_path}")
+
+    # Load registry
+    registry_path = Path.home() / "artdag-registry" / "registry.json"
+    with open(registry_path) as f:
+        registry = json.load(f)
+
+    # Create signed activities for each asset
+    activities = []
+
+    for asset_name, asset_data in registry["assets"].items():
+        print(f"\nSigning: {asset_name}")
+        print(f"  Hash: {asset_data['cid'][:16]}...")
+
+        activity = create_activity(
+            actor_id=actor_id,
+            asset_name=asset_name,
+            cid=asset_data["cid"],
+            asset_type=asset_data["asset_type"],
+            domain=domain,
+        )
+
+        signed_activity = sign_activity(activity, private_key, actor_id, domain)
+        activities.append(signed_activity)
+
+        print(f"  Activity ID: {signed_activity['activity_id']}")
+        print(f"  Signature: {signed_activity['signature']['signatureValue'][:32]}...")
+
+    # Save activities
+    activities_path = Path.home() / "artdag-registry" / "activities.json"
+    activities_data = {
+        "version": "1.0",
+        "activities": activities
+    }
+
+    with open(activities_path, "w") as f:
+        json.dump(activities_data, f, indent=2)
+
+    print(f"\nSaved {len(activities)} signed activities to: {activities_path}")
+
+
+if __name__ == "__main__":
+    main()
--- a/core/tests/init.py
+++ b/core/tests/init.py
@@ -0,0 +1 @@
+# Tests for new standalone primitive engine
--- a/core/tests/test_activities.py
+++ b/core/tests/test_activities.py
@@ -0,0 +1,613 @@
+# tests/test_activities.py
+"""Tests for the activity tracking and cache deletion system."""
+
+import tempfile
+import time
+from pathlib import Path
+
+import pytest
+
+from artdag import Cache, DAG, Node, NodeType
+from artdag.activities import Activity, ActivityStore, ActivityManager, make_is_shared_fn
+
+
+class MockActivityPubStore:
+    """Mock ActivityPub store for testing is_shared functionality."""
+
+    def __init__(self):
+        self._shared_hashes = set()
+
+    def mark_shared(self, cid: str):
+        """Mark a content hash as shared (published)."""
+        self._shared_hashes.add(cid)
+
+    def find_by_object_hash(self, cid: str):
+        """Return mock activities for shared hashes."""
+        if cid in self._shared_hashes:
+            return [MockActivity("Create")]
+        return []
+
+
+class MockActivity:
+    """Mock ActivityPub activity."""
+    def __init__(self, activity_type: str):
+        self.activity_type = activity_type
+
+
+@pytest.fixture
+def temp_dir():
+    """Create a temporary directory for tests."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        yield Path(tmpdir)
+
+
+@pytest.fixture
+def cache(temp_dir):
+    """Create a cache instance."""
+    return Cache(temp_dir / "cache")
+
+
+@pytest.fixture
+def activity_store(temp_dir):
+    """Create an activity store instance."""
+    return ActivityStore(temp_dir / "activities")
+
+
+@pytest.fixture
+def ap_store():
+    """Create a mock ActivityPub store."""
+    return MockActivityPubStore()
+
+
+@pytest.fixture
+def manager(cache, activity_store, ap_store):
+    """Create an ActivityManager instance."""
+    return ActivityManager(
+        cache=cache,
+        activity_store=activity_store,
+        is_shared_fn=make_is_shared_fn(ap_store),
+    )
+
+
+def create_test_file(path: Path, content: str = "test content") -> Path:
+    """Create a test file with content."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(content)
+    return path
+
+
+class TestCacheEntryContentHash:
+    """Tests for cid in CacheEntry."""
+
+    def test_put_computes_cid(self, cache, temp_dir):
+        """put() should compute and store cid."""
+        test_file = create_test_file(temp_dir / "input.txt", "hello world")
+
+        cache.put("node1", test_file, "test")
+        entry = cache.get_entry("node1")
+
+        assert entry is not None
+        assert entry.cid != ""
+        assert len(entry.cid) == 64  # SHA-3-256 hex
+
+    def test_same_content_same_hash(self, cache, temp_dir):
+        """Same file content should produce same hash."""
+        file1 = create_test_file(temp_dir / "file1.txt", "identical content")
+        file2 = create_test_file(temp_dir / "file2.txt", "identical content")
+
+        cache.put("node1", file1, "test")
+        cache.put("node2", file2, "test")
+
+        entry1 = cache.get_entry("node1")
+        entry2 = cache.get_entry("node2")
+
+        assert entry1.cid == entry2.cid
+
+    def test_different_content_different_hash(self, cache, temp_dir):
+        """Different file content should produce different hash."""
+        file1 = create_test_file(temp_dir / "file1.txt", "content A")
+        file2 = create_test_file(temp_dir / "file2.txt", "content B")
+
+        cache.put("node1", file1, "test")
+        cache.put("node2", file2, "test")
+
+        entry1 = cache.get_entry("node1")
+        entry2 = cache.get_entry("node2")
+
+        assert entry1.cid != entry2.cid
+
+    def test_find_by_cid(self, cache, temp_dir):
+        """Should find entry by content hash."""
+        test_file = create_test_file(temp_dir / "input.txt", "unique content")
+        cache.put("node1", test_file, "test")
+
+        entry = cache.get_entry("node1")
+        found = cache.find_by_cid(entry.cid)
+
+        assert found is not None
+        assert found.node_id == "node1"
+
+    def test_cid_persists(self, temp_dir):
+        """cid should persist across cache reloads."""
+        cache1 = Cache(temp_dir / "cache")
+        test_file = create_test_file(temp_dir / "input.txt", "persistent")
+        cache1.put("node1", test_file, "test")
+        original_hash = cache1.get_entry("node1").cid
+
+        # Create new cache instance (reload from disk)
+        cache2 = Cache(temp_dir / "cache")
+        entry = cache2.get_entry("node1")
+
+        assert entry.cid == original_hash
+
+
+class TestActivity:
+    """Tests for Activity dataclass."""
+
+    def test_activity_from_dag(self):
+        """Activity.from_dag() should classify nodes correctly."""
+        # Build a simple DAG: source -> transform -> output
+        dag = DAG()
+        source = Node(NodeType.SOURCE, {"path": "/test.mp4"})
+        transform = Node(NodeType.TRANSFORM, {"effect": "blur"}, inputs=[source.node_id])
+        output = Node(NodeType.RESIZE, {"width": 100}, inputs=[transform.node_id])
+
+        dag.add_node(source)
+        dag.add_node(transform)
+        dag.add_node(output)
+        dag.set_output(output.node_id)
+
+        activity = Activity.from_dag(dag)
+
+        assert source.node_id in activity.input_ids
+        assert activity.output_id == output.node_id
+        assert transform.node_id in activity.intermediate_ids
+
+    def test_activity_with_multiple_inputs(self):
+        """Activity should handle DAGs with multiple source nodes."""
+        dag = DAG()
+        source1 = Node(NodeType.SOURCE, {"path": "/a.mp4"})
+        source2 = Node(NodeType.SOURCE, {"path": "/b.mp4"})
+        sequence = Node(NodeType.SEQUENCE, {}, inputs=[source1.node_id, source2.node_id])
+
+        dag.add_node(source1)
+        dag.add_node(source2)
+        dag.add_node(sequence)
+        dag.set_output(sequence.node_id)
+
+        activity = Activity.from_dag(dag)
+
+        assert len(activity.input_ids) == 2
+        assert source1.node_id in activity.input_ids
+        assert source2.node_id in activity.input_ids
+        assert activity.output_id == sequence.node_id
+        assert len(activity.intermediate_ids) == 0
+
+    def test_activity_serialization(self):
+        """Activity should serialize and deserialize correctly."""
+        dag = DAG()
+        source = Node(NodeType.SOURCE, {"path": "/test.mp4"})
+        dag.add_node(source)
+        dag.set_output(source.node_id)
+
+        activity = Activity.from_dag(dag)
+        data = activity.to_dict()
+        restored = Activity.from_dict(data)
+
+        assert restored.activity_id == activity.activity_id
+        assert restored.input_ids == activity.input_ids
+        assert restored.output_id == activity.output_id
+        assert restored.intermediate_ids == activity.intermediate_ids
+
+    def test_all_node_ids(self):
+        """all_node_ids should return all nodes."""
+        activity = Activity(
+            activity_id="test",
+            input_ids=["a", "b"],
+            output_id="c",
+            intermediate_ids=["d", "e"],
+            created_at=time.time(),
+        )
+
+        all_ids = activity.all_node_ids
+        assert set(all_ids) == {"a", "b", "c", "d", "e"}
+
+
+class TestActivityStore:
+    """Tests for ActivityStore persistence."""
+
+    def test_add_and_get(self, activity_store):
+        """Should add and retrieve activities."""
+        activity = Activity(
+            activity_id="test1",
+            input_ids=["input1"],
+            output_id="output1",
+            intermediate_ids=["inter1"],
+            created_at=time.time(),
+        )
+
+        activity_store.add(activity)
+        retrieved = activity_store.get("test1")
+
+        assert retrieved is not None
+        assert retrieved.activity_id == "test1"
+
+    def test_persistence(self, temp_dir):
+        """Activities should persist across store reloads."""
+        store1 = ActivityStore(temp_dir / "activities")
+        activity = Activity(
+            activity_id="persist",
+            input_ids=["i1"],
+            output_id="o1",
+            intermediate_ids=[],
+            created_at=time.time(),
+        )
+        store1.add(activity)
+
+        # Reload
+        store2 = ActivityStore(temp_dir / "activities")
+        retrieved = store2.get("persist")
+
+        assert retrieved is not None
+        assert retrieved.activity_id == "persist"
+
+    def test_find_by_input_ids(self, activity_store):
+        """Should find activities with matching inputs."""
+        activity1 = Activity(
+            activity_id="a1",
+            input_ids=["x", "y"],
+            output_id="o1",
+            intermediate_ids=[],
+            created_at=time.time(),
+        )
+        activity2 = Activity(
+            activity_id="a2",
+            input_ids=["y", "x"],  # Same inputs, different order
+            output_id="o2",
+            intermediate_ids=[],
+            created_at=time.time(),
+        )
+        activity3 = Activity(
+            activity_id="a3",
+            input_ids=["z"],  # Different inputs
+            output_id="o3",
+            intermediate_ids=[],
+            created_at=time.time(),
+        )
+
+        activity_store.add(activity1)
+        activity_store.add(activity2)
+        activity_store.add(activity3)
+
+        found = activity_store.find_by_input_ids(["x", "y"])
+        assert len(found) == 2
+        assert {a.activity_id for a in found} == {"a1", "a2"}
+
+    def test_find_using_node(self, activity_store):
+        """Should find activities referencing a node."""
+        activity = Activity(
+            activity_id="a1",
+            input_ids=["input1"],
+            output_id="output1",
+            intermediate_ids=["inter1"],
+            created_at=time.time(),
+        )
+        activity_store.add(activity)
+
+        # Should find by input
+        found = activity_store.find_using_node("input1")
+        assert len(found) == 1
+
+        # Should find by intermediate
+        found = activity_store.find_using_node("inter1")
+        assert len(found) == 1
+
+        # Should find by output
+        found = activity_store.find_using_node("output1")
+        assert len(found) == 1
+
+        # Should not find unknown
+        found = activity_store.find_using_node("unknown")
+        assert len(found) == 0
+
+    def test_remove(self, activity_store):
+        """Should remove activities."""
+        activity = Activity(
+            activity_id="to_remove",
+            input_ids=["i"],
+            output_id="o",
+            intermediate_ids=[],
+            created_at=time.time(),
+        )
+        activity_store.add(activity)
+        assert activity_store.get("to_remove") is not None
+
+        result = activity_store.remove("to_remove")
+        assert result is True
+        assert activity_store.get("to_remove") is None
+
+
+class TestActivityManager:
+    """Tests for ActivityManager deletion rules."""
+
+    def test_can_delete_orphaned_entry(self, manager, cache, temp_dir):
+        """Orphaned entries (not in any activity) can be deleted."""
+        test_file = create_test_file(temp_dir / "orphan.txt", "orphan")
+        cache.put("orphan_node", test_file, "test")
+
+        assert manager.can_delete_cache_entry("orphan_node") is True
+
+    def test_cannot_delete_shared_entry(self, manager, cache, temp_dir, ap_store):
+        """Shared entries (ActivityPub published) cannot be deleted."""
+        test_file = create_test_file(temp_dir / "shared.txt", "shared content")
+        cache.put("shared_node", test_file, "test")
+
+        # Mark as shared
+        entry = cache.get_entry("shared_node")
+        ap_store.mark_shared(entry.cid)
+
+        assert manager.can_delete_cache_entry("shared_node") is False
+
+    def test_cannot_delete_activity_input(self, manager, cache, activity_store, temp_dir):
+        """Activity inputs cannot be deleted."""
+        test_file = create_test_file(temp_dir / "input.txt", "input")
+        cache.put("input_node", test_file, "test")
+
+        activity = Activity(
+            activity_id="a1",
+            input_ids=["input_node"],
+            output_id="output_node",
+            intermediate_ids=[],
+            created_at=time.time(),
+        )
+        activity_store.add(activity)
+
+        assert manager.can_delete_cache_entry("input_node") is False
+
+    def test_cannot_delete_activity_output(self, manager, cache, activity_store, temp_dir):
+        """Activity outputs cannot be deleted."""
+        test_file = create_test_file(temp_dir / "output.txt", "output")
+        cache.put("output_node", test_file, "test")
+
+        activity = Activity(
+            activity_id="a1",
+            input_ids=["input_node"],
+            output_id="output_node",
+            intermediate_ids=[],
+            created_at=time.time(),
+        )
+        activity_store.add(activity)
+
+        assert manager.can_delete_cache_entry("output_node") is False
+
+    def test_can_delete_intermediate(self, manager, cache, activity_store, temp_dir):
+        """Intermediate entries can be deleted (they're reconstructible)."""
+        test_file = create_test_file(temp_dir / "inter.txt", "intermediate")
+        cache.put("inter_node", test_file, "test")
+
+        activity = Activity(
+            activity_id="a1",
+            input_ids=["input_node"],
+            output_id="output_node",
+            intermediate_ids=["inter_node"],
+            created_at=time.time(),
+        )
+        activity_store.add(activity)
+
+        assert manager.can_delete_cache_entry("inter_node") is True
+
+    def test_can_discard_activity_no_shared(self, manager, activity_store):
+        """Activity can be discarded if nothing is shared."""
+        activity = Activity(
+            activity_id="a1",
+            input_ids=["i1"],
+            output_id="o1",
+            intermediate_ids=["m1"],
+            created_at=time.time(),
+        )
+        activity_store.add(activity)
+
+        assert manager.can_discard_activity("a1") is True
+
+    def test_cannot_discard_activity_with_shared_output(self, manager, cache, activity_store, temp_dir, ap_store):
+        """Activity cannot be discarded if output is shared."""
+        test_file = create_test_file(temp_dir / "output.txt", "output content")
+        cache.put("o1", test_file, "test")
+
+        activity = Activity(
+            activity_id="a1",
+            input_ids=["i1"],
+            output_id="o1",
+            intermediate_ids=[],
+            created_at=time.time(),
+        )
+        activity_store.add(activity)
+
+        # Mark output as shared
+        entry = cache.get_entry("o1")
+        ap_store.mark_shared(entry.cid)
+
+        assert manager.can_discard_activity("a1") is False
+
+    def test_cannot_discard_activity_with_shared_input(self, manager, cache, activity_store, temp_dir, ap_store):
+        """Activity cannot be discarded if input is shared."""
+        test_file = create_test_file(temp_dir / "input.txt", "input content")
+        cache.put("i1", test_file, "test")
+
+        activity = Activity(
+            activity_id="a1",
+            input_ids=["i1"],
+            output_id="o1",
+            intermediate_ids=[],
+            created_at=time.time(),
+        )
+        activity_store.add(activity)
+
+        entry = cache.get_entry("i1")
+        ap_store.mark_shared(entry.cid)
+
+        assert manager.can_discard_activity("a1") is False
+
+    def test_discard_activity_deletes_intermediates(self, manager, cache, activity_store, temp_dir):
+        """Discarding activity should delete intermediate cache entries."""
+        # Create cache entries
+        input_file = create_test_file(temp_dir / "input.txt", "input")
+        inter_file = create_test_file(temp_dir / "inter.txt", "intermediate")
+        output_file = create_test_file(temp_dir / "output.txt", "output")
+
+        cache.put("i1", input_file, "test")
+        cache.put("m1", inter_file, "test")
+        cache.put("o1", output_file, "test")
+
+        activity = Activity(
+            activity_id="a1",
+            input_ids=["i1"],
+            output_id="o1",
+            intermediate_ids=["m1"],
+            created_at=time.time(),
+        )
+        activity_store.add(activity)
+
+        # Discard
+        result = manager.discard_activity("a1")
+
+        assert result is True
+        assert cache.has("m1") is False  # Intermediate deleted
+        assert activity_store.get("a1") is None  # Activity removed
+
+    def test_discard_activity_deletes_orphaned_output(self, manager, cache, activity_store, temp_dir):
+        """Discarding activity should delete output if orphaned."""
+        output_file = create_test_file(temp_dir / "output.txt", "output")
+        cache.put("o1", output_file, "test")
+
+        activity = Activity(
+            activity_id="a1",
+            input_ids=[],
+            output_id="o1",
+            intermediate_ids=[],
+            created_at=time.time(),
+        )
+        activity_store.add(activity)
+
+        manager.discard_activity("a1")
+
+        assert cache.has("o1") is False  # Orphaned output deleted
+
+    def test_discard_activity_keeps_shared_output(self, manager, cache, activity_store, temp_dir, ap_store):
+        """Discarding should fail if output is shared."""
+        output_file = create_test_file(temp_dir / "output.txt", "shared output")
+        cache.put("o1", output_file, "test")
+
+        activity = Activity(
+            activity_id="a1",
+            input_ids=[],
+            output_id="o1",
+            intermediate_ids=[],
+            created_at=time.time(),
+        )
+        activity_store.add(activity)
+
+        entry = cache.get_entry("o1")
+        ap_store.mark_shared(entry.cid)
+
+        result = manager.discard_activity("a1")
+
+        assert result is False  # Cannot discard
+        assert cache.has("o1") is True  # Output preserved
+        assert activity_store.get("a1") is not None  # Activity preserved
+
+    def test_discard_keeps_input_used_elsewhere(self, manager, cache, activity_store, temp_dir):
+        """Input used by another activity should not be deleted."""
+        input_file = create_test_file(temp_dir / "input.txt", "shared input")
+        cache.put("shared_input", input_file, "test")
+
+        activity1 = Activity(
+            activity_id="a1",
+            input_ids=["shared_input"],
+            output_id="o1",
+            intermediate_ids=[],
+            created_at=time.time(),
+        )
+        activity2 = Activity(
+            activity_id="a2",
+            input_ids=["shared_input"],
+            output_id="o2",
+            intermediate_ids=[],
+            created_at=time.time(),
+        )
+        activity_store.add(activity1)
+        activity_store.add(activity2)
+
+        manager.discard_activity("a1")
+
+        # Input still used by a2
+        assert cache.has("shared_input") is True
+
+    def test_get_deletable_entries(self, manager, cache, activity_store, temp_dir):
+        """Should list all deletable entries."""
+        # Orphan (deletable)
+        orphan = create_test_file(temp_dir / "orphan.txt", "orphan")
+        cache.put("orphan", orphan, "test")
+
+        # Intermediate (deletable)
+        inter = create_test_file(temp_dir / "inter.txt", "inter")
+        cache.put("inter", inter, "test")
+
+        # Input (not deletable)
+        inp = create_test_file(temp_dir / "input.txt", "input")
+        cache.put("input", inp, "test")
+
+        activity = Activity(
+            activity_id="a1",
+            input_ids=["input"],
+            output_id="output",
+            intermediate_ids=["inter"],
+            created_at=time.time(),
+        )
+        activity_store.add(activity)
+
+        deletable = manager.get_deletable_entries()
+        deletable_ids = {e.node_id for e in deletable}
+
+        assert "orphan" in deletable_ids
+        assert "inter" in deletable_ids
+        assert "input" not in deletable_ids
+
+    def test_cleanup_intermediates(self, manager, cache, activity_store, temp_dir):
+        """cleanup_intermediates() should delete all intermediate entries."""
+        inter1 = create_test_file(temp_dir / "i1.txt", "inter1")
+        inter2 = create_test_file(temp_dir / "i2.txt", "inter2")
+        cache.put("inter1", inter1, "test")
+        cache.put("inter2", inter2, "test")
+
+        activity = Activity(
+            activity_id="a1",
+            input_ids=["input"],
+            output_id="output",
+            intermediate_ids=["inter1", "inter2"],
+            created_at=time.time(),
+        )
+        activity_store.add(activity)
+
+        deleted = manager.cleanup_intermediates()
+
+        assert deleted == 2
+        assert cache.has("inter1") is False
+        assert cache.has("inter2") is False
+
+
+class TestMakeIsSharedFn:
+    """Tests for make_is_shared_fn factory."""
+
+    def test_returns_true_for_shared(self, ap_store):
+        """Should return True for shared content."""
+        is_shared = make_is_shared_fn(ap_store)
+        ap_store.mark_shared("hash123")
+
+        assert is_shared("hash123") is True
+
+    def test_returns_false_for_not_shared(self, ap_store):
+        """Should return False for non-shared content."""
+        is_shared = make_is_shared_fn(ap_store)
+
+        assert is_shared("unknown_hash") is False
--- a/core/tests/test_cache.py
+++ b/core/tests/test_cache.py
@@ -0,0 +1,163 @@
+# tests/test_primitive_new/test_cache.py
+"""Tests for primitive cache module."""
+
+import pytest
+import tempfile
+from pathlib import Path
+
+from artdag.cache import Cache, CacheStats
+
+
+@pytest.fixture
+def cache_dir():
+    """Create temporary cache directory."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        yield Path(tmpdir)
+
+
+@pytest.fixture
+def cache(cache_dir):
+    """Create cache instance."""
+    return Cache(cache_dir)
+
+
+@pytest.fixture
+def sample_file(cache_dir):
+    """Create a sample file to cache."""
+    file_path = cache_dir / "sample.txt"
+    file_path.write_text("test content")
+    return file_path
+
+
+class TestCache:
+    """Test Cache class."""
+
+    def test_cache_creation(self, cache_dir):
+        """Test cache directory is created."""
+        cache = Cache(cache_dir / "new_cache")
+        assert cache.cache_dir.exists()
+
+    def test_cache_put_and_get(self, cache, sample_file):
+        """Test putting and getting from cache."""
+        node_id = "abc123"
+        cached_path = cache.put(node_id, sample_file, "TEST")
+
+        assert cached_path.exists()
+        assert cache.has(node_id)
+
+        retrieved = cache.get(node_id)
+        assert retrieved == cached_path
+
+    def test_cache_miss(self, cache):
+        """Test cache miss returns None."""
+        result = cache.get("nonexistent")
+        assert result is None
+
+    def test_cache_stats_hit_miss(self, cache, sample_file):
+        """Test cache hit/miss stats."""
+        cache.put("abc123", sample_file, "TEST")
+
+        # Miss
+        cache.get("nonexistent")
+        assert cache.stats.misses == 1
+
+        # Hit
+        cache.get("abc123")
+        assert cache.stats.hits == 1
+
+        assert cache.stats.hit_rate == 0.5
+
+    def test_cache_remove(self, cache, sample_file):
+        """Test removing from cache."""
+        node_id = "abc123"
+        cache.put(node_id, sample_file, "TEST")
+        assert cache.has(node_id)
+
+        cache.remove(node_id)
+        assert not cache.has(node_id)
+
+    def test_cache_clear(self, cache, sample_file):
+        """Test clearing cache."""
+        cache.put("node1", sample_file, "TEST")
+        cache.put("node2", sample_file, "TEST")
+
+        assert cache.stats.total_entries == 2
+
+        cache.clear()
+
+        assert cache.stats.total_entries == 0
+        assert not cache.has("node1")
+        assert not cache.has("node2")
+
+    def test_cache_preserves_extension(self, cache, cache_dir):
+        """Test that cache preserves file extension."""
+        mp4_file = cache_dir / "video.mp4"
+        mp4_file.write_text("fake video")
+
+        cached = cache.put("video_node", mp4_file, "SOURCE")
+        assert cached.suffix == ".mp4"
+
+    def test_cache_list_entries(self, cache, sample_file):
+        """Test listing cache entries."""
+        cache.put("node1", sample_file, "TYPE1")
+        cache.put("node2", sample_file, "TYPE2")
+
+        entries = cache.list_entries()
+        assert len(entries) == 2
+
+        node_ids = {e.node_id for e in entries}
+        assert "node1" in node_ids
+        assert "node2" in node_ids
+
+    def test_cache_persistence(self, cache_dir, sample_file):
+        """Test cache persists across instances."""
+        # First instance
+        cache1 = Cache(cache_dir)
+        cache1.put("abc123", sample_file, "TEST")
+
+        # Second instance loads from disk
+        cache2 = Cache(cache_dir)
+        assert cache2.has("abc123")
+
+    def test_cache_prune_by_age(self, cache, sample_file):
+        """Test pruning by age."""
+        import time
+
+        cache.put("old_node", sample_file, "TEST")
+
+        # Manually set old creation time
+        entry = cache._entries["old_node"]
+        entry.created_at = time.time() - 3600  # 1 hour ago
+
+        removed = cache.prune(max_age_seconds=1800)  # 30 minutes
+
+        assert removed == 1
+        assert not cache.has("old_node")
+
+    def test_cache_output_path(self, cache):
+        """Test getting output path for node."""
+        path = cache.get_output_path("abc123", ".mp4")
+        assert path.suffix == ".mp4"
+        assert "abc123" in str(path)
+        assert path.parent.exists()
+
+
+class TestCacheStats:
+    """Test CacheStats class."""
+
+    def test_hit_rate_calculation(self):
+        """Test hit rate calculation."""
+        stats = CacheStats()
+
+        stats.record_hit()
+        stats.record_hit()
+        stats.record_miss()
+
+        assert stats.hits == 2
+        assert stats.misses == 1
+        assert abs(stats.hit_rate - 0.666) < 0.01
+
+    def test_initial_hit_rate(self):
+        """Test hit rate with no requests."""
+        stats = CacheStats()
+        assert stats.hit_rate == 0.0
--- a/core/tests/test_dag.py
+++ b/core/tests/test_dag.py
@@ -0,0 +1,271 @@
+# tests/test_primitive_new/test_dag.py
+"""Tests for primitive DAG data structures."""
+
+import pytest
+from artdag.dag import Node, NodeType, DAG, DAGBuilder
+
+
+class TestNode:
+    """Test Node class."""
+
+    def test_node_creation(self):
+        """Test basic node creation."""
+        node = Node(node_type=NodeType.SOURCE, config={"path": "/test.mp4"})
+        assert node.node_type == NodeType.SOURCE
+        assert node.config == {"path": "/test.mp4"}
+        assert node.node_id is not None
+
+    def test_node_id_is_content_addressed(self):
+        """Same content produces same node_id."""
+        node1 = Node(node_type=NodeType.SOURCE, config={"path": "/test.mp4"})
+        node2 = Node(node_type=NodeType.SOURCE, config={"path": "/test.mp4"})
+        assert node1.node_id == node2.node_id
+
+    def test_different_config_different_id(self):
+        """Different config produces different node_id."""
+        node1 = Node(node_type=NodeType.SOURCE, config={"path": "/test1.mp4"})
+        node2 = Node(node_type=NodeType.SOURCE, config={"path": "/test2.mp4"})
+        assert node1.node_id != node2.node_id
+
+    def test_node_with_inputs(self):
+        """Node with inputs includes them in ID."""
+        node1 = Node(node_type=NodeType.SEGMENT, config={"duration": 5}, inputs=["abc123"])
+        node2 = Node(node_type=NodeType.SEGMENT, config={"duration": 5}, inputs=["abc123"])
+        node3 = Node(node_type=NodeType.SEGMENT, config={"duration": 5}, inputs=["def456"])
+
+        assert node1.node_id == node2.node_id
+        assert node1.node_id != node3.node_id
+
+    def test_node_serialization(self):
+        """Test node to_dict and from_dict."""
+        original = Node(
+            node_type=NodeType.SEGMENT,
+            config={"duration": 5.0, "offset": 10.0},
+            inputs=["abc123"],
+            name="my_segment",
+        )
+        data = original.to_dict()
+        restored = Node.from_dict(data)
+
+        assert restored.node_type == original.node_type
+        assert restored.config == original.config
+        assert restored.inputs == original.inputs
+        assert restored.name == original.name
+        assert restored.node_id == original.node_id
+
+    def test_custom_node_type(self):
+        """Test node with custom string type."""
+        node = Node(node_type="CUSTOM_TYPE", config={"custom": True})
+        assert node.node_type == "CUSTOM_TYPE"
+        assert node.node_id is not None
+
+
+class TestDAG:
+    """Test DAG class."""
+
+    def test_dag_creation(self):
+        """Test basic DAG creation."""
+        dag = DAG()
+        assert len(dag.nodes) == 0
+        assert dag.output_id is None
+
+    def test_add_node(self):
+        """Test adding nodes to DAG."""
+        dag = DAG()
+        node = Node(node_type=NodeType.SOURCE, config={"path": "/test.mp4"})
+        node_id = dag.add_node(node)
+
+        assert node_id in dag.nodes
+        assert dag.nodes[node_id] == node
+
+    def test_node_deduplication(self):
+        """Same node added twice returns same ID."""
+        dag = DAG()
+        node1 = Node(node_type=NodeType.SOURCE, config={"path": "/test.mp4"})
+        node2 = Node(node_type=NodeType.SOURCE, config={"path": "/test.mp4"})
+
+        id1 = dag.add_node(node1)
+        id2 = dag.add_node(node2)
+
+        assert id1 == id2
+        assert len(dag.nodes) == 1
+
+    def test_set_output(self):
+        """Test setting output node."""
+        dag = DAG()
+        node = Node(node_type=NodeType.SOURCE, config={"path": "/test.mp4"})
+        node_id = dag.add_node(node)
+        dag.set_output(node_id)
+
+        assert dag.output_id == node_id
+
+    def test_set_output_invalid(self):
+        """Setting invalid output raises error."""
+        dag = DAG()
+        with pytest.raises(ValueError):
+            dag.set_output("nonexistent")
+
+    def test_topological_order(self):
+        """Test topological ordering."""
+        dag = DAG()
+
+        # Create simple chain: source -> segment -> output
+        source = Node(node_type=NodeType.SOURCE, config={"path": "/test.mp4"})
+        source_id = dag.add_node(source)
+
+        segment = Node(node_type=NodeType.SEGMENT, config={"duration": 5}, inputs=[source_id])
+        segment_id = dag.add_node(segment)
+
+        dag.set_output(segment_id)
+        order = dag.topological_order()
+
+        # Source must come before segment
+        assert order.index(source_id) < order.index(segment_id)
+
+    def test_validate_valid_dag(self):
+        """Test validation of valid DAG."""
+        dag = DAG()
+        node = Node(node_type=NodeType.SOURCE, config={"path": "/test.mp4"})
+        node_id = dag.add_node(node)
+        dag.set_output(node_id)
+
+        errors = dag.validate()
+        assert len(errors) == 0
+
+    def test_validate_no_output(self):
+        """DAG without output is invalid."""
+        dag = DAG()
+        node = Node(node_type=NodeType.SOURCE, config={"path": "/test.mp4"})
+        dag.add_node(node)
+
+        errors = dag.validate()
+        assert len(errors) > 0
+        assert any("output" in e.lower() for e in errors)
+
+    def test_validate_missing_input(self):
+        """DAG with missing input reference is invalid."""
+        dag = DAG()
+        node = Node(node_type=NodeType.SEGMENT, config={"duration": 5}, inputs=["nonexistent"])
+        node_id = dag.add_node(node)
+        dag.set_output(node_id)
+
+        errors = dag.validate()
+        assert len(errors) > 0
+        assert any("missing" in e.lower() for e in errors)
+
+    def test_dag_serialization(self):
+        """Test DAG to_dict and from_dict."""
+        dag = DAG(metadata={"name": "test_dag"})
+        source = Node(node_type=NodeType.SOURCE, config={"path": "/test.mp4"})
+        source_id = dag.add_node(source)
+        dag.set_output(source_id)
+
+        data = dag.to_dict()
+        restored = DAG.from_dict(data)
+
+        assert len(restored.nodes) == len(dag.nodes)
+        assert restored.output_id == dag.output_id
+        assert restored.metadata == dag.metadata
+
+    def test_dag_json(self):
+        """Test DAG JSON serialization."""
+        dag = DAG()
+        node = Node(node_type=NodeType.SOURCE, config={"path": "/test.mp4"})
+        node_id = dag.add_node(node)
+        dag.set_output(node_id)
+
+        json_str = dag.to_json()
+        restored = DAG.from_json(json_str)
+
+        assert len(restored.nodes) == 1
+        assert restored.output_id == node_id
+
+
+class TestDAGBuilder:
+    """Test DAGBuilder class."""
+
+    def test_builder_source(self):
+        """Test building source node."""
+        builder = DAGBuilder()
+        source_id = builder.source("/test.mp4")
+
+        assert source_id in builder.dag.nodes
+        node = builder.dag.nodes[source_id]
+        assert node.node_type == NodeType.SOURCE
+        assert node.config["path"] == "/test.mp4"
+
+    def test_builder_segment(self):
+        """Test building segment node."""
+        builder = DAGBuilder()
+        source_id = builder.source("/test.mp4")
+        segment_id = builder.segment(source_id, duration=5.0, offset=10.0)
+
+        node = builder.dag.nodes[segment_id]
+        assert node.node_type == NodeType.SEGMENT
+        assert node.config["duration"] == 5.0
+        assert node.config["offset"] == 10.0
+        assert source_id in node.inputs
+
+    def test_builder_chain(self):
+        """Test building a chain of nodes."""
+        builder = DAGBuilder()
+        source = builder.source("/test.mp4")
+        segment = builder.segment(source, duration=5.0)
+        resized = builder.resize(segment, width=1920, height=1080)
+        builder.set_output(resized)
+
+        dag = builder.build()
+
+        assert len(dag.nodes) == 3
+        assert dag.output_id == resized
+        errors = dag.validate()
+        assert len(errors) == 0
+
+    def test_builder_sequence(self):
+        """Test building sequence node."""
+        builder = DAGBuilder()
+        s1 = builder.source("/clip1.mp4")
+        s2 = builder.source("/clip2.mp4")
+        seq = builder.sequence([s1, s2], transition={"type": "crossfade", "duration": 0.5})
+        builder.set_output(seq)
+
+        dag = builder.build()
+        node = dag.nodes[seq]
+        assert node.node_type == NodeType.SEQUENCE
+        assert s1 in node.inputs
+        assert s2 in node.inputs
+
+    def test_builder_mux(self):
+        """Test building mux node."""
+        builder = DAGBuilder()
+        video = builder.source("/video.mp4")
+        audio = builder.source("/audio.mp3")
+        muxed = builder.mux(video, audio)
+        builder.set_output(muxed)
+
+        dag = builder.build()
+        node = dag.nodes[muxed]
+        assert node.node_type == NodeType.MUX
+        assert video in node.inputs
+        assert audio in node.inputs
+
+    def test_builder_transform(self):
+        """Test building transform node."""
+        builder = DAGBuilder()
+        source = builder.source("/test.mp4")
+        transformed = builder.transform(source, effects={"saturation": 1.5, "contrast": 1.2})
+        builder.set_output(transformed)
+
+        dag = builder.build()
+        node = dag.nodes[transformed]
+        assert node.node_type == NodeType.TRANSFORM
+        assert node.config["effects"]["saturation"] == 1.5
+
+    def test_builder_validation_fails(self):
+        """Builder raises error for invalid DAG."""
+        builder = DAGBuilder()
+        builder.source("/test.mp4")
+        # No output set
+
+        with pytest.raises(ValueError):
+            builder.build()
--- a/core/tests/test_engine.py
+++ b/core/tests/test_engine.py
@@ -0,0 +1,464 @@
+# tests/test_primitive_new/test_engine.py
+"""Tests for primitive engine execution."""
+
+import pytest
+import subprocess
+import tempfile
+from pathlib import Path
+
+from artdag.dag import DAG, DAGBuilder, Node, NodeType
+from artdag.engine import Engine
+from artdag import nodes  # Register executors
+
+
+@pytest.fixture
+def cache_dir():
+    """Create temporary cache directory."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        yield Path(tmpdir)
+
+
+@pytest.fixture
+def engine(cache_dir):
+    """Create engine instance."""
+    return Engine(cache_dir)
+
+
+@pytest.fixture
+def test_video(cache_dir):
+    """Create a test video file."""
+    video_path = cache_dir / "test_video.mp4"
+    cmd = [
+        "ffmpeg", "-y",
+        "-f", "lavfi", "-i", "testsrc=duration=5:size=320x240:rate=30",
+        "-f", "lavfi", "-i", "sine=frequency=440:duration=5",
+        "-c:v", "libx264", "-preset", "ultrafast",
+        "-c:a", "aac",
+        str(video_path)
+    ]
+    subprocess.run(cmd, capture_output=True, check=True)
+    return video_path
+
+
+@pytest.fixture
+def test_audio(cache_dir):
+    """Create a test audio file."""
+    audio_path = cache_dir / "test_audio.mp3"
+    cmd = [
+        "ffmpeg", "-y",
+        "-f", "lavfi", "-i", "sine=frequency=880:duration=5",
+        "-c:a", "libmp3lame",
+        str(audio_path)
+    ]
+    subprocess.run(cmd, capture_output=True, check=True)
+    return audio_path
+
+
+class TestEngineBasic:
+    """Test basic engine functionality."""
+
+    def test_engine_creation(self, cache_dir):
+        """Test engine creation."""
+        engine = Engine(cache_dir)
+        assert engine.cache is not None
+
+    def test_invalid_dag(self, engine):
+        """Test executing invalid DAG."""
+        dag = DAG()  # No nodes, no output
+        result = engine.execute(dag)
+
+        assert not result.success
+        assert "Invalid DAG" in result.error
+
+    def test_missing_executor(self, engine):
+        """Test executing node with missing executor."""
+        dag = DAG()
+        node = Node(node_type="UNKNOWN_TYPE", config={})
+        node_id = dag.add_node(node)
+        dag.set_output(node_id)
+
+        result = engine.execute(dag)
+
+        assert not result.success
+        assert "No executor" in result.error
+
+
+class TestSourceExecutor:
+    """Test SOURCE node executor."""
+
+    def test_source_creates_symlink(self, engine, test_video):
+        """Test source node creates symlink."""
+        builder = DAGBuilder()
+        source = builder.source(str(test_video))
+        builder.set_output(source)
+        dag = builder.build()
+
+        result = engine.execute(dag)
+
+        assert result.success
+        assert result.output_path.exists()
+        assert result.output_path.is_symlink()
+
+    def test_source_missing_file(self, engine):
+        """Test source with missing file."""
+        builder = DAGBuilder()
+        source = builder.source("/nonexistent/file.mp4")
+        builder.set_output(source)
+        dag = builder.build()
+
+        result = engine.execute(dag)
+
+        assert not result.success
+        assert "not found" in result.error.lower()
+
+
+class TestSegmentExecutor:
+    """Test SEGMENT node executor."""
+
+    def test_segment_duration(self, engine, test_video):
+        """Test segment extracts correct duration."""
+        builder = DAGBuilder()
+        source = builder.source(str(test_video))
+        segment = builder.segment(source, duration=2.0)
+        builder.set_output(segment)
+        dag = builder.build()
+
+        result = engine.execute(dag)
+
+        assert result.success
+
+        # Verify duration
+        probe = subprocess.run([
+            "ffprobe", "-v", "error",
+            "-show_entries", "format=duration",
+            "-of", "csv=p=0",
+            str(result.output_path)
+        ], capture_output=True, text=True)
+        duration = float(probe.stdout.strip())
+        assert abs(duration - 2.0) < 0.1
+
+    def test_segment_with_offset(self, engine, test_video):
+        """Test segment with offset."""
+        builder = DAGBuilder()
+        source = builder.source(str(test_video))
+        segment = builder.segment(source, offset=1.0, duration=2.0)
+        builder.set_output(segment)
+        dag = builder.build()
+
+        result = engine.execute(dag)
+        assert result.success
+
+
+class TestResizeExecutor:
+    """Test RESIZE node executor."""
+
+    def test_resize_dimensions(self, engine, test_video):
+        """Test resize to specific dimensions."""
+        builder = DAGBuilder()
+        source = builder.source(str(test_video))
+        resized = builder.resize(source, width=640, height=480, mode="fit")
+        builder.set_output(resized)
+        dag = builder.build()
+
+        result = engine.execute(dag)
+
+        assert result.success
+
+        # Verify dimensions
+        probe = subprocess.run([
+            "ffprobe", "-v", "error",
+            "-show_entries", "stream=width,height",
+            "-of", "csv=p=0:s=x",
+            str(result.output_path)
+        ], capture_output=True, text=True)
+        dimensions = probe.stdout.strip().split("\n")[0]
+        assert "640x480" in dimensions
+
+
+class TestTransformExecutor:
+    """Test TRANSFORM node executor."""
+
+    def test_transform_saturation(self, engine, test_video):
+        """Test transform with saturation effect."""
+        builder = DAGBuilder()
+        source = builder.source(str(test_video))
+        transformed = builder.transform(source, effects={"saturation": 1.5})
+        builder.set_output(transformed)
+        dag = builder.build()
+
+        result = engine.execute(dag)
+        assert result.success
+        assert result.output_path.exists()
+
+    def test_transform_multiple_effects(self, engine, test_video):
+        """Test transform with multiple effects."""
+        builder = DAGBuilder()
+        source = builder.source(str(test_video))
+        transformed = builder.transform(source, effects={
+            "saturation": 1.2,
+            "contrast": 1.1,
+            "brightness": 0.05,
+        })
+        builder.set_output(transformed)
+        dag = builder.build()
+
+        result = engine.execute(dag)
+        assert result.success
+
+
+class TestSequenceExecutor:
+    """Test SEQUENCE node executor."""
+
+    def test_sequence_cut(self, engine, test_video):
+        """Test sequence with cut transition."""
+        builder = DAGBuilder()
+        s1 = builder.source(str(test_video))
+        seg1 = builder.segment(s1, duration=2.0)
+        seg2 = builder.segment(s1, offset=2.0, duration=2.0)
+        seq = builder.sequence([seg1, seg2], transition={"type": "cut"})
+        builder.set_output(seq)
+        dag = builder.build()
+
+        result = engine.execute(dag)
+
+        assert result.success
+
+        # Verify combined duration
+        probe = subprocess.run([
+            "ffprobe", "-v", "error",
+            "-show_entries", "format=duration",
+            "-of", "csv=p=0",
+            str(result.output_path)
+        ], capture_output=True, text=True)
+        duration = float(probe.stdout.strip())
+        assert abs(duration - 4.0) < 0.2
+
+    def test_sequence_crossfade(self, engine, test_video):
+        """Test sequence with crossfade transition."""
+        builder = DAGBuilder()
+        s1 = builder.source(str(test_video))
+        seg1 = builder.segment(s1, duration=3.0)
+        seg2 = builder.segment(s1, offset=1.0, duration=3.0)
+        seq = builder.sequence([seg1, seg2], transition={"type": "crossfade", "duration": 0.5})
+        builder.set_output(seq)
+        dag = builder.build()
+
+        result = engine.execute(dag)
+
+        assert result.success
+
+        # Duration should be sum minus crossfade
+        probe = subprocess.run([
+            "ffprobe", "-v", "error",
+            "-show_entries", "format=duration",
+            "-of", "csv=p=0",
+            str(result.output_path)
+        ], capture_output=True, text=True)
+        duration = float(probe.stdout.strip())
+        # 3 + 3 - 0.5 = 5.5
+        assert abs(duration - 5.5) < 0.3
+
+
+class TestMuxExecutor:
+    """Test MUX node executor."""
+
+    def test_mux_video_audio(self, engine, test_video, test_audio):
+        """Test muxing video and audio."""
+        builder = DAGBuilder()
+        video = builder.source(str(test_video))
+        audio = builder.source(str(test_audio))
+        muxed = builder.mux(video, audio)
+        builder.set_output(muxed)
+        dag = builder.build()
+
+        result = engine.execute(dag)
+
+        assert result.success
+        assert result.output_path.exists()
+
+
+class TestAudioMixExecutor:
+    """Test AUDIO_MIX node executor."""
+
+    def test_audio_mix_simple(self, engine, cache_dir):
+        """Test simple audio mixing."""
+        # Create two test audio files with different frequencies
+        audio1_path = cache_dir / "audio1.mp3"
+        audio2_path = cache_dir / "audio2.mp3"
+
+        subprocess.run([
+            "ffmpeg", "-y",
+            "-f", "lavfi", "-i", "sine=frequency=440:duration=3",
+            "-c:a", "libmp3lame",
+            str(audio1_path)
+        ], capture_output=True, check=True)
+
+        subprocess.run([
+            "ffmpeg", "-y",
+            "-f", "lavfi", "-i", "sine=frequency=880:duration=3",
+            "-c:a", "libmp3lame",
+            str(audio2_path)
+        ], capture_output=True, check=True)
+
+        builder = DAGBuilder()
+        a1 = builder.source(str(audio1_path))
+        a2 = builder.source(str(audio2_path))
+        mixed = builder.audio_mix([a1, a2])
+        builder.set_output(mixed)
+        dag = builder.build()
+
+        result = engine.execute(dag)
+
+        assert result.success
+        assert result.output_path.exists()
+
+    def test_audio_mix_with_gains(self, engine, cache_dir):
+        """Test audio mixing with custom gains."""
+        audio1_path = cache_dir / "audio1.mp3"
+        audio2_path = cache_dir / "audio2.mp3"
+
+        subprocess.run([
+            "ffmpeg", "-y",
+            "-f", "lavfi", "-i", "sine=frequency=440:duration=3",
+            "-c:a", "libmp3lame",
+            str(audio1_path)
+        ], capture_output=True, check=True)
+
+        subprocess.run([
+            "ffmpeg", "-y",
+            "-f", "lavfi", "-i", "sine=frequency=880:duration=3",
+            "-c:a", "libmp3lame",
+            str(audio2_path)
+        ], capture_output=True, check=True)
+
+        builder = DAGBuilder()
+        a1 = builder.source(str(audio1_path))
+        a2 = builder.source(str(audio2_path))
+        mixed = builder.audio_mix([a1, a2], gains=[1.0, 0.3])
+        builder.set_output(mixed)
+        dag = builder.build()
+
+        result = engine.execute(dag)
+
+        assert result.success
+        assert result.output_path.exists()
+
+    def test_audio_mix_three_inputs(self, engine, cache_dir):
+        """Test mixing three audio sources."""
+        audio_paths = []
+        for i, freq in enumerate([440, 660, 880]):
+            path = cache_dir / f"audio{i}.mp3"
+            subprocess.run([
+                "ffmpeg", "-y",
+                "-f", "lavfi", "-i", f"sine=frequency={freq}:duration=2",
+                "-c:a", "libmp3lame",
+                str(path)
+            ], capture_output=True, check=True)
+            audio_paths.append(path)
+
+        builder = DAGBuilder()
+        sources = [builder.source(str(p)) for p in audio_paths]
+        mixed = builder.audio_mix(sources, gains=[1.0, 0.5, 0.3])
+        builder.set_output(mixed)
+        dag = builder.build()
+
+        result = engine.execute(dag)
+
+        assert result.success
+        assert result.output_path.exists()
+
+
+class TestCaching:
+    """Test engine caching behavior."""
+
+    def test_cache_reuse(self, engine, test_video):
+        """Test that cached results are reused."""
+        builder = DAGBuilder()
+        source = builder.source(str(test_video))
+        builder.set_output(source)
+        dag = builder.build()
+
+        # First execution
+        result1 = engine.execute(dag)
+        assert result1.success
+        assert result1.nodes_cached == 0
+        assert result1.nodes_executed == 1
+
+        # Second execution should use cache
+        result2 = engine.execute(dag)
+        assert result2.success
+        assert result2.nodes_cached == 1
+        assert result2.nodes_executed == 0
+
+    def test_clear_cache(self, engine, test_video):
+        """Test clearing cache."""
+        builder = DAGBuilder()
+        source = builder.source(str(test_video))
+        builder.set_output(source)
+        dag = builder.build()
+
+        engine.execute(dag)
+        assert engine.cache.stats.total_entries == 1
+
+        engine.clear_cache()
+        assert engine.cache.stats.total_entries == 0
+
+
+class TestProgressCallback:
+    """Test progress callback functionality."""
+
+    def test_progress_callback(self, engine, test_video):
+        """Test that progress callback is called."""
+        progress_updates = []
+
+        def callback(progress):
+            progress_updates.append((progress.node_id, progress.status))
+
+        engine.set_progress_callback(callback)
+
+        builder = DAGBuilder()
+        source = builder.source(str(test_video))
+        builder.set_output(source)
+        dag = builder.build()
+
+        result = engine.execute(dag)
+
+        assert result.success
+        assert len(progress_updates) > 0
+        # Should have pending, running, completed
+        statuses = [p[1] for p in progress_updates]
+        assert "pending" in statuses
+        assert "completed" in statuses
+
+
+class TestFullWorkflow:
+    """Test complete workflow."""
+
+    def test_full_pipeline(self, engine, test_video, test_audio):
+        """Test complete video processing pipeline."""
+        builder = DAGBuilder()
+
+        # Load sources
+        video = builder.source(str(test_video))
+        audio = builder.source(str(test_audio))
+
+        # Extract segment
+        segment = builder.segment(video, duration=3.0)
+
+        # Resize
+        resized = builder.resize(segment, width=640, height=480)
+
+        # Apply effects
+        transformed = builder.transform(resized, effects={"saturation": 1.3})
+
+        # Mux with audio
+        final = builder.mux(transformed, audio)
+        builder.set_output(final)
+
+        dag = builder.build()
+
+        result = engine.execute(dag)
+
+        assert result.success
+        assert result.output_path.exists()
+        assert result.nodes_executed == 6  # source, source, segment, resize, transform, mux
--- a/core/tests/test_executor.py
+++ b/core/tests/test_executor.py
@@ -0,0 +1,110 @@
+# tests/test_primitive_new/test_executor.py
+"""Tests for primitive executor module."""
+
+import pytest
+from pathlib import Path
+from typing import Any, Dict, List
+
+from artdag.dag import NodeType
+from artdag.executor import (
+    Executor,
+    register_executor,
+    get_executor,
+    list_executors,
+    clear_executors,
+)
+
+
+class TestExecutorRegistry:
+    """Test executor registration."""
+
+    def setup_method(self):
+        """Clear registry before each test."""
+        clear_executors()
+
+    def teardown_method(self):
+        """Clear registry after each test."""
+        clear_executors()
+
+    def test_register_executor(self):
+        """Test registering an executor."""
+        @register_executor(NodeType.SOURCE)
+        class TestSourceExecutor(Executor):
+            def execute(self, config, inputs, output_path):
+                return output_path
+
+        executor = get_executor(NodeType.SOURCE)
+        assert executor is not None
+        assert isinstance(executor, TestSourceExecutor)
+
+    def test_register_custom_type(self):
+        """Test registering executor for custom type."""
+        @register_executor("CUSTOM_NODE")
+        class CustomExecutor(Executor):
+            def execute(self, config, inputs, output_path):
+                return output_path
+
+        executor = get_executor("CUSTOM_NODE")
+        assert executor is not None
+
+    def test_get_unregistered(self):
+        """Test getting unregistered executor."""
+        executor = get_executor(NodeType.ANALYZE)
+        assert executor is None
+
+    def test_list_executors(self):
+        """Test listing registered executors."""
+        @register_executor(NodeType.SOURCE)
+        class SourceExec(Executor):
+            def execute(self, config, inputs, output_path):
+                return output_path
+
+        @register_executor(NodeType.SEGMENT)
+        class SegmentExec(Executor):
+            def execute(self, config, inputs, output_path):
+                return output_path
+
+        executors = list_executors()
+        assert "SOURCE" in executors
+        assert "SEGMENT" in executors
+
+    def test_overwrite_warning(self, caplog):
+        """Test warning when overwriting executor."""
+        @register_executor(NodeType.SOURCE)
+        class FirstExecutor(Executor):
+            def execute(self, config, inputs, output_path):
+                return output_path
+
+        # Register again - should warn
+        @register_executor(NodeType.SOURCE)
+        class SecondExecutor(Executor):
+            def execute(self, config, inputs, output_path):
+                return output_path
+
+        # Second should be registered
+        executor = get_executor(NodeType.SOURCE)
+        assert isinstance(executor, SecondExecutor)
+
+
+class TestExecutorBase:
+    """Test Executor base class."""
+
+    def test_validate_config_default(self):
+        """Test default validate_config returns empty list."""
+        class TestExecutor(Executor):
+            def execute(self, config, inputs, output_path):
+                return output_path
+
+        executor = TestExecutor()
+        errors = executor.validate_config({"any": "config"})
+        assert errors == []
+
+    def test_estimate_output_size(self):
+        """Test default output size estimation."""
+        class TestExecutor(Executor):
+            def execute(self, config, inputs, output_path):
+                return output_path
+
+        executor = TestExecutor()
+        size = executor.estimate_output_size({}, [100, 200, 300])
+        assert size == 600
--- a/core/tests/test_ipfs_access.py
+++ b/core/tests/test_ipfs_access.py
@@ -0,0 +1,301 @@
+"""
+Tests for IPFS access consistency.
+
+All IPFS access should use IPFS_API (multiaddr format) for consistency
+with art-celery's ipfs_client.py. This ensures Docker deployments work
+correctly since IPFS_API is set to /dns/ipfs/tcp/5001.
+"""
+
+import os
+import re
+from pathlib import Path
+from typing import Optional
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+
+def multiaddr_to_url(multiaddr: str) -> str:
+    """
+    Convert IPFS multiaddr to HTTP URL.
+
+    This is the canonical conversion used by ipfs_client.py.
+    """
+    # Handle /dns/hostname/tcp/port format
+    dns_match = re.match(r"/dns[46]?/([^/]+)/tcp/(\d+)", multiaddr)
+    if dns_match:
+        return f"http://{dns_match.group(1)}:{dns_match.group(2)}"
+
+    # Handle /ip4/address/tcp/port format
+    ip4_match = re.match(r"/ip4/([^/]+)/tcp/(\d+)", multiaddr)
+    if ip4_match:
+        return f"http://{ip4_match.group(1)}:{ip4_match.group(2)}"
+
+    # Fallback: assume it's already a URL or use default
+    if multiaddr.startswith("http"):
+        return multiaddr
+    return "http://127.0.0.1:5001"
+
+
+class TestMultiaddrConversion:
+    """Tests for multiaddr to URL conversion."""
+
+    def test_dns_format(self) -> None:
+        """Docker DNS format should convert correctly."""
+        result = multiaddr_to_url("/dns/ipfs/tcp/5001")
+        assert result == "http://ipfs:5001"
+
+    def test_dns4_format(self) -> None:
+        """dns4 format should work."""
+        result = multiaddr_to_url("/dns4/ipfs.example.com/tcp/5001")
+        assert result == "http://ipfs.example.com:5001"
+
+    def test_ip4_format(self) -> None:
+        """IPv4 format should convert correctly."""
+        result = multiaddr_to_url("/ip4/127.0.0.1/tcp/5001")
+        assert result == "http://127.0.0.1:5001"
+
+    def test_already_url(self) -> None:
+        """HTTP URLs should pass through."""
+        result = multiaddr_to_url("http://localhost:5001")
+        assert result == "http://localhost:5001"
+
+    def test_fallback(self) -> None:
+        """Unknown format should fallback to localhost."""
+        result = multiaddr_to_url("garbage")
+        assert result == "http://127.0.0.1:5001"
+
+
+class TestIPFSConfigConsistency:
+    """
+    Tests to ensure IPFS configuration is consistent.
+
+    The effect executor should use IPFS_API (like ipfs_client.py)
+    rather than a separate IPFS_GATEWAY variable.
+    """
+
+    def test_effect_module_should_not_use_gateway_var(self) -> None:
+        """
+        Regression test: Effect module should use IPFS_API, not IPFS_GATEWAY.
+
+        Bug found 2026-01-12: artdag/nodes/effect.py used IPFS_GATEWAY which
+        defaulted to http://127.0.0.1:8080. This doesn't work in Docker where
+        the IPFS node is a separate container. The ipfs_client.py uses IPFS_API
+        which is correctly set in docker-compose.
+        """
+        from artdag.nodes import effect
+
+        # Check if the module still has the old IPFS_GATEWAY variable
+        # After the fix, this should use IPFS_API instead
+        has_gateway_var = hasattr(effect, 'IPFS_GATEWAY')
+        has_api_var = hasattr(effect, 'IPFS_API') or hasattr(effect, '_get_ipfs_base_url')
+
+        # This test documents the current buggy state
+        # After fix: has_gateway_var should be False, has_api_var should be True
+        if has_gateway_var and not has_api_var:
+            pytest.fail(
+                "Effect module uses IPFS_GATEWAY instead of IPFS_API. "
+                "This breaks Docker deployments where IPFS_API=/dns/ipfs/tcp/5001 "
+                "but IPFS_GATEWAY defaults to localhost."
+            )
+
+    def test_ipfs_api_default_is_localhost(self) -> None:
+        """IPFS_API should default to localhost for local development."""
+        default_api = "/ip4/127.0.0.1/tcp/5001"
+        url = multiaddr_to_url(default_api)
+        assert "127.0.0.1" in url
+        assert "5001" in url
+
+    def test_docker_ipfs_api_uses_service_name(self) -> None:
+        """In Docker, IPFS_API should use the service name."""
+        docker_api = "/dns/ipfs/tcp/5001"
+        url = multiaddr_to_url(docker_api)
+        assert url == "http://ipfs:5001"
+        assert "127.0.0.1" not in url
+
+
+class TestEffectFetchURL:
+    """Tests for the URL used to fetch effects from IPFS."""
+
+    def test_fetch_should_use_api_cat_endpoint(self) -> None:
+        """
+        Effect fetch should use /api/v0/cat endpoint (like ipfs_client.py).
+
+        The IPFS API's cat endpoint works reliably in Docker.
+        The gateway endpoint (port 8080) requires separate configuration.
+        """
+        # The correct way to fetch via API
+        base_url = "http://ipfs:5001"
+        cid = "QmTestCid123"
+        correct_url = f"{base_url}/api/v0/cat?arg={cid}"
+
+        assert "/api/v0/cat" in correct_url
+        assert "arg=" in correct_url
+
+    def test_gateway_url_is_different_from_api(self) -> None:
+        """
+        Document the difference between gateway and API URLs.
+
+        Gateway: http://ipfs:8080/ipfs/{cid}  (requires IPFS_GATEWAY config)
+        API:     http://ipfs:5001/api/v0/cat?arg={cid}  (uses IPFS_API config)
+
+        Using the API is more reliable since IPFS_API is already configured
+        correctly in docker-compose.yml.
+        """
+        cid = "QmTestCid123"
+
+        # Gateway style (the old broken way)
+        gateway_url = f"http://ipfs:8080/ipfs/{cid}"
+
+        # API style (the correct way)
+        api_url = f"http://ipfs:5001/api/v0/cat?arg={cid}"
+
+        # These are different approaches
+        assert gateway_url != api_url
+        assert ":8080" in gateway_url
+        assert ":5001" in api_url
+
+
+class TestEffectDependencies:
+    """Tests for effect dependency handling."""
+
+    def test_parse_pep723_dependencies(self) -> None:
+        """Should parse PEP 723 dependencies from effect source."""
+        source = '''
+# /// script
+# requires-python = ">=3.10"
+# dependencies = ["numpy", "opencv-python"]
+# ///
+"""
+@effect test_effect
+"""
+
+def process_frame(frame, params, state):
+    return frame, state
+'''
+        # Import the function after the fix is applied
+        from artdag.nodes.effect import _parse_pep723_dependencies
+
+        deps = _parse_pep723_dependencies(source)
+
+        assert deps == ["numpy", "opencv-python"]
+
+    def test_parse_pep723_no_dependencies(self) -> None:
+        """Should return empty list if no dependencies block."""
+        source = '''
+"""
+@effect simple_effect
+"""
+
+def process_frame(frame, params, state):
+    return frame, state
+'''
+        from artdag.nodes.effect import _parse_pep723_dependencies
+
+        deps = _parse_pep723_dependencies(source)
+
+        assert deps == []
+
+    def test_ensure_dependencies_already_installed(self) -> None:
+        """Should return True if dependencies are already installed."""
+        from artdag.nodes.effect import _ensure_dependencies
+
+        # os is always available
+        result = _ensure_dependencies(["os"], "QmTest123")
+
+        assert result is True
+
+    def test_effect_with_missing_dependency_gives_clear_error(self, tmp_path: Path) -> None:
+        """
+        Regression test: Missing dependencies should give clear error message.
+
+        Bug found 2026-01-12: Effect with numpy dependency failed with
+        "No module named 'numpy'" but this was swallowed and reported as
+        "Unknown effect: invert" - very confusing.
+        """
+        effects_dir = tmp_path / "_effects"
+        effect_cid = "QmTestEffectWithDeps"
+
+        # Create effect that imports a non-existent module
+        effect_dir = effects_dir / effect_cid
+        effect_dir.mkdir(parents=True)
+        (effect_dir / "effect.py").write_text('''
+# /// script
+# requires-python = ">=3.10"
+# dependencies = ["some_nonexistent_package_xyz"]
+# ///
+"""
+@effect test_effect
+"""
+import some_nonexistent_package_xyz
+
+def process_frame(frame, params, state):
+    return frame, state
+''')
+
+        # The effect file exists
+        effect_path = effects_dir / effect_cid / "effect.py"
+        assert effect_path.exists()
+
+        # When loading fails due to missing import, error should mention the dependency
+        with patch.dict(os.environ, {"CACHE_DIR": str(tmp_path)}):
+            from artdag.nodes.effect import _load_cached_effect
+
+            # This should return None but log a clear error about the missing module
+            result = _load_cached_effect(effect_cid)
+
+            # Currently returns None, which causes "Unknown effect" error
+            # The real issue is the dependency isn't installed
+            assert result is None
+
+
+class TestEffectCacheAndFetch:
+    """Integration tests for effect caching and fetching."""
+
+    def test_effect_loads_from_cache_without_ipfs(self, tmp_path: Path) -> None:
+        """When effect is in cache, IPFS should not be contacted."""
+        effects_dir = tmp_path / "_effects"
+        effect_cid = "QmTestEffect123"
+
+        # Create cached effect
+        effect_dir = effects_dir / effect_cid
+        effect_dir.mkdir(parents=True)
+        (effect_dir / "effect.py").write_text('''
+def process_frame(frame, params, state):
+    return frame, state
+''')
+
+        # Patch environment and verify effect can be loaded
+        with patch.dict(os.environ, {"CACHE_DIR": str(tmp_path)}):
+            from artdag.nodes.effect import _load_cached_effect
+
+            # Should load without hitting IPFS
+            effect_fn = _load_cached_effect(effect_cid)
+            assert effect_fn is not None
+
+    def test_effect_fetch_uses_correct_endpoint(self, tmp_path: Path) -> None:
+        """When fetching from IPFS, should use API endpoint."""
+        effects_dir = tmp_path / "_effects"
+        effects_dir.mkdir(parents=True)
+        effect_cid = "QmNonExistentEffect"
+
+        with patch.dict(os.environ, {
+            "CACHE_DIR": str(tmp_path),
+            "IPFS_API": "/dns/ipfs/tcp/5001"
+        }):
+            with patch('requests.post') as mock_post:
+                # Set up mock to return effect source
+                mock_response = MagicMock()
+                mock_response.status_code = 200
+                mock_response.content = b'def process_frame(f, p, s): return f, s'
+                mock_post.return_value = mock_response
+
+                from artdag.nodes.effect import _load_cached_effect
+
+                # Try to load - should attempt IPFS fetch
+                _load_cached_effect(effect_cid)
+
+                # After fix, this should use the API endpoint
+                # Check if requests.post was called (API style)
+                # or requests.get was called (gateway style)
+                # The fix should make it use POST to /api/v0/cat
				`@@ -0,0 +1 @@`
				`# Tests for new standalone primitive engine`