Add testing infrastructure and refactor DAG transformation

Testing setup: - Add pyproject.toml with mypy and pytest configuration - Add requirements-dev.txt for development dependencies - Create tests/ directory with test fixtures - Add 17 unit tests for DAG transformation pipeline Type annotations: - Add app/types.py with TypedDict definitions for node configs - Add typed helper functions: transform_node, build_input_name_mapping, bind_inputs, prepare_dag_for_execution - Refactor run_recipe to use the new typed helpers Regression tests for today's bugs: - test_effect_cid_key_not_effect_hash: Verifies CID uses 'cid' key - test_source_cid_binding_persists: Verifies bound CIDs in final DAG Run tests with: pytest tests/ -v Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-12 09:37:06 +00:00
parent 0ba1d6e82d
commit 56009c391d
7 changed files with 996 additions and 110 deletions
--- a/app/routers/recipes.py
+++ b/app/routers/recipes.py
@@ -4,8 +4,9 @@ Recipe management routes for L1 server.
 Handles recipe upload, listing, viewing, and execution.
 """

+import json
 import logging
-from typing import List, Optional
+from typing import Any, Dict, List, Optional, Tuple

 from fastapi import APIRouter, Request, Depends, HTTPException, UploadFile, File
 from fastapi.responses import HTMLResponse
@@ -18,6 +19,10 @@ from artdag_common.middleware.auth import UserContext
 from ..dependencies import require_auth, get_templates, get_redis_client, get_cache_manager
 from ..services.auth_service import AuthService
 from ..services.recipe_service import RecipeService
+from ..types import (
+    CompiledNode, TransformedNode, Registry, Recipe,
+    is_variable_input, get_effect_cid,
+)

 router = APIRouter()
 logger = logging.getLogger(__name__)
@@ -30,14 +35,184 @@ class RecipeUploadRequest(BaseModel):


 class RecipeRunRequest(BaseModel):
-    inputs: dict = {}
+    """Request to run a recipe with variable inputs."""
+    inputs: Dict[str, str] = {}  # Map input names to CIDs


-def get_recipe_service():
+def get_recipe_service() -> RecipeService:
    """Get recipe service instance."""
    return RecipeService(get_redis_client(), get_cache_manager())


+def transform_node(
+    node: CompiledNode,
+    assets: Dict[str, Dict[str, Any]],
+    effects: Dict[str, Dict[str, Any]],
+) -> TransformedNode:
+    """
+    Transform a compiled node to artdag execution format.
+
+    - Resolves asset references to CIDs for SOURCE nodes
+    - Resolves effect references to CIDs for EFFECT nodes
+    - Renames 'type' to 'node_type', 'id' to 'node_id'
+    """
+    node_id = node.get("id", "")
+    config = dict(node.get("config", {}))  # Copy to avoid mutation
+
+    # Resolve asset references for SOURCE nodes
+    if node.get("type") == "SOURCE" and "asset" in config:
+        asset_name = config["asset"]
+        if asset_name in assets:
+            config["cid"] = assets[asset_name].get("cid")
+
+    # Resolve effect references for EFFECT nodes
+    if node.get("type") == "EFFECT" and "effect" in config:
+        effect_name = config["effect"]
+        if effect_name in effects:
+            config["cid"] = effects[effect_name].get("cid")
+
+    return {
+        "node_id": node_id,
+        "node_type": node.get("type", "EFFECT"),
+        "config": config,
+        "inputs": node.get("inputs", []),
+        "name": node.get("name"),
+    }
+
+
+def build_input_name_mapping(
+    nodes: Dict[str, TransformedNode],
+) -> Dict[str, str]:
+    """
+    Build a mapping from input names to node IDs for variable inputs.
+
+    Variable inputs can be referenced by:
+    - node_id directly
+    - config.name (e.g., "Second Video")
+    - snake_case version (e.g., "second_video")
+    - kebab-case version (e.g., "second-video")
+    - node.name (def binding name)
+    """
+    input_name_to_node: Dict[str, str] = {}
+
+    for node_id, node in nodes.items():
+        if node.get("node_type") != "SOURCE":
+            continue
+
+        config = node.get("config", {})
+        if not is_variable_input(config):
+            continue
+
+        # Map by node_id
+        input_name_to_node[node_id] = node_id
+
+        # Map by config.name
+        name = config.get("name")
+        if name:
+            input_name_to_node[name] = node_id
+            input_name_to_node[name.lower().replace(" ", "_")] = node_id
+            input_name_to_node[name.lower().replace(" ", "-")] = node_id
+
+        # Map by node.name (def binding)
+        node_name = node.get("name")
+        if node_name:
+            input_name_to_node[node_name] = node_id
+            input_name_to_node[node_name.replace("-", "_")] = node_id
+
+    return input_name_to_node
+
+
+def bind_inputs(
+    nodes: Dict[str, TransformedNode],
+    input_name_to_node: Dict[str, str],
+    user_inputs: Dict[str, str],
+) -> List[str]:
+    """
+    Bind user-provided input CIDs to source nodes.
+
+    Returns list of warnings for inputs that couldn't be bound.
+    """
+    warnings: List[str] = []
+
+    for input_name, cid in user_inputs.items():
+        # Try direct node ID match first
+        if input_name in nodes:
+            node = nodes[input_name]
+            if node.get("node_type") == "SOURCE":
+                node["config"]["cid"] = cid
+                logger.info(f"Bound input {input_name} directly to node, cid={cid[:16]}...")
+                continue
+
+        # Try input name lookup
+        if input_name in input_name_to_node:
+            node_id = input_name_to_node[input_name]
+            node = nodes[node_id]
+            node["config"]["cid"] = cid
+            logger.info(f"Bound input {input_name} via lookup to node {node_id}, cid={cid[:16]}...")
+            continue
+
+        # Input not found
+        warnings.append(f"Input '{input_name}' not found in recipe")
+        logger.warning(f"Input {input_name} not found in nodes or input_name_to_node")
+
+    return warnings
+
+
+def prepare_dag_for_execution(
+    recipe: Recipe,
+    user_inputs: Dict[str, str],
+) -> Tuple[str, List[str]]:
+    """
+    Prepare a recipe DAG for execution by transforming nodes and binding inputs.
+
+    Returns (dag_json, warnings).
+    """
+    recipe_dag = recipe.get("dag")
+    if not recipe_dag or not isinstance(recipe_dag, dict):
+        raise ValueError("Recipe has no DAG definition")
+
+    # Deep copy to avoid mutating original
+    dag_copy = json.loads(json.dumps(recipe_dag))
+    nodes = dag_copy.get("nodes", {})
+
+    # Get registry for resolving references
+    registry = recipe.get("registry", {})
+    assets = registry.get("assets", {}) if registry else {}
+    effects = registry.get("effects", {}) if registry else {}
+
+    # Transform nodes from list to dict if needed
+    if isinstance(nodes, list):
+        nodes_dict: Dict[str, TransformedNode] = {}
+        for node in nodes:
+            node_id = node.get("id")
+            if node_id:
+                nodes_dict[node_id] = transform_node(node, assets, effects)
+        nodes = nodes_dict
+        dag_copy["nodes"] = nodes
+
+    # Build input name mapping and bind user inputs
+    input_name_to_node = build_input_name_mapping(nodes)
+    logger.info(f"Input name to node mapping: {input_name_to_node}")
+    logger.info(f"User-provided inputs: {user_inputs}")
+
+    warnings = bind_inputs(nodes, input_name_to_node, user_inputs)
+
+    # Log final SOURCE node configs for debugging
+    for nid, n in nodes.items():
+        if n.get("node_type") == "SOURCE":
+            logger.info(f"Final SOURCE node {nid}: config={n.get('config')}")
+
+    # Transform output to output_id
+    if "output" in dag_copy:
+        dag_copy["output_id"] = dag_copy.pop("output")
+
+    # Add metadata if not present
+    if "metadata" not in dag_copy:
+        dag_copy["metadata"] = {}
+
+    return json.dumps(dag_copy), warnings
+
+
@router.post("/upload")
 async def upload_recipe(
    file: UploadFile = File(...),
@@ -320,117 +495,15 @@ async def run_recipe(
        raise HTTPException(404, "Recipe not found")

    try:
-        import json
-
        # Create run using run service
        run_service = RunService(database, get_redis_client(), get_cache_manager())

-        # If recipe has a DAG definition, bind inputs and convert to JSON
-        recipe_dag = recipe.get("dag")
+        # Prepare DAG for execution (transform nodes, bind inputs)
        dag_json = None
-        if recipe_dag and isinstance(recipe_dag, dict):
-            # Bind inputs to the DAG's source nodes
-            dag_copy = json.loads(json.dumps(recipe_dag))  # Deep copy
-            nodes = dag_copy.get("nodes", {})
-
-            # Get registry for resolving asset/effect references
-            registry = recipe.get("registry", {})
-            assets = registry.get("assets", {})
-            effects = registry.get("effects", {})
-
-            # Convert nodes from list to dict if needed, and transform to artdag format
-            if isinstance(nodes, list):
-                nodes_dict = {}
-                for node in nodes:
-                    node_id = node.get("id")
-                    if node_id:
-                        config = node.get("config", {})
-
-                        # Resolve asset references for SOURCE nodes
-                        if node.get("type") == "SOURCE" and "asset" in config:
-                            asset_name = config["asset"]
-                            if asset_name in assets:
-                                config["cid"] = assets[asset_name].get("cid")
-
-                        # Resolve effect references for EFFECT nodes
-                        if node.get("type") == "EFFECT" and "effect" in config:
-                            effect_name = config["effect"]
-                            if effect_name in effects:
-                                # Use "cid" - the executor looks for this field
-                                config["cid"] = effects[effect_name].get("cid")
-
-                        # Transform to artdag format: type->node_type, id->node_id
-                        transformed = {
-                            "node_id": node_id,
-                            "node_type": node.get("type", "EFFECT"),
-                            "config": config,
-                            "inputs": node.get("inputs", []),
-                            "name": node.get("name"),
-                        }
-                        nodes_dict[node_id] = transformed
-                nodes = nodes_dict
-                dag_copy["nodes"] = nodes
-
-            # Build lookup for variable inputs: map input names to node IDs
-            # Variable inputs can be referenced by: node_id, config.name, config.input (if string)
-            input_name_to_node = {}
-            for node_id, node in nodes.items():
-                logger.debug(f"Checking node {node_id}: type={node.get('node_type')}, config={node.get('config')}")
-                if node.get("node_type") == "SOURCE":
-                    config = node.get("config", {})
-                    # Only variable inputs (those with 'input' in config, not fixed assets)
-                    if config.get("input"):
-                        input_name_to_node[node_id] = node_id
-                        # Map by config.name (e.g., "Second Video")
-                        if config.get("name"):
-                            name = config["name"]
-                            input_name_to_node[name] = node_id
-                            # Also allow snake_case version
-                            input_name_to_node[name.lower().replace(" ", "_")] = node_id
-                            input_name_to_node[name.lower().replace(" ", "-")] = node_id
-                        # Map by node.name if available (def binding)
-                        if node.get("name"):
-                            input_name_to_node[node["name"]] = node_id
-                            input_name_to_node[node["name"].replace("-", "_")] = node_id
-
-            logger.info(f"Input name to node mapping: {input_name_to_node}")
-            logger.info(f"User-provided inputs: {req.inputs}")
-
-            # Map user-provided input names to content hashes (for variable inputs)
-            for input_name, cid in req.inputs.items():
-                # Try direct node ID match first
-                if input_name in nodes:
-                    node = nodes[input_name]
-                    if node.get("node_type") == "SOURCE":
-                        if "config" not in node:
-                            node["config"] = {}
-                        node["config"]["cid"] = cid
-                        logger.info(f"Bound input {input_name} directly to node, cid={cid[:16]}...")
-                # Try input name lookup
-                elif input_name in input_name_to_node:
-                    node_id = input_name_to_node[input_name]
-                    node = nodes[node_id]
-                    if "config" not in node:
-                        node["config"] = {}
-                    node["config"]["cid"] = cid
-                    logger.info(f"Bound input {input_name} via lookup to node {node_id}, cid={cid[:16]}...")
-                else:
-                    logger.warning(f"Input {input_name} not found in nodes or input_name_to_node")
-
-            # Log final DAG nodes for debugging
-            for nid, n in nodes.items():
-                if n.get("node_type") == "SOURCE":
-                    logger.info(f"Final SOURCE node {nid}: config={n.get('config')}")
-
-            # Transform output to output_id
-            if "output" in dag_copy:
-                dag_copy["output_id"] = dag_copy.pop("output")
-
-            # Add metadata if not present
-            if "metadata" not in dag_copy:
-                dag_copy["metadata"] = {}
-
-            dag_json = json.dumps(dag_copy)
+        if recipe.get("dag"):
+            dag_json, warnings = prepare_dag_for_execution(recipe, req.inputs)
+            for warning in warnings:
+                logger.warning(warning)

        run, error = await run_service.create_run(
            recipe=recipe_id,  # Use recipe hash as primary identifier
--- a/app/types.py
+++ b/app/types.py
@@ -0,0 +1,160 @@
+"""
+Type definitions for Art DAG L1 server.
+
+Uses TypedDict for configuration structures to enable mypy checking.
+"""
+
+from typing import Any, Dict, List, Optional, TypedDict, Union
+from typing_extensions import NotRequired
+
+
+# === Node Config Types ===
+
+class SourceConfig(TypedDict, total=False):
+    """Config for SOURCE nodes."""
+    cid: str  # Content ID (IPFS CID or SHA3-256 hash)
+    asset: str  # Asset name from registry
+    input: bool  # True if this is a variable input
+    name: str  # Human-readable name for variable inputs
+    description: str  # Description for variable inputs
+
+
+class EffectConfig(TypedDict, total=False):
+    """Config for EFFECT nodes."""
+    effect: str  # Effect name
+    cid: str  # Effect CID (for cached/IPFS effects)
+    # Effect parameters are additional keys
+    intensity: float
+    level: float
+
+
+class SequenceConfig(TypedDict, total=False):
+    """Config for SEQUENCE nodes."""
+    transition: Dict[str, Any]  # Transition config
+
+
+class SegmentConfig(TypedDict, total=False):
+    """Config for SEGMENT nodes."""
+    start: float
+    end: float
+    duration: float
+
+
+# Union of all config types
+NodeConfig = Union[SourceConfig, EffectConfig, SequenceConfig, SegmentConfig, Dict[str, Any]]
+
+
+# === Node Types ===
+
+class CompiledNode(TypedDict):
+    """Node as produced by the S-expression compiler."""
+    id: str
+    type: str  # "SOURCE", "EFFECT", "SEQUENCE", etc.
+    config: Dict[str, Any]
+    inputs: List[str]
+    name: NotRequired[str]
+
+
+class TransformedNode(TypedDict):
+    """Node after transformation for artdag execution."""
+    node_id: str
+    node_type: str
+    config: Dict[str, Any]
+    inputs: List[str]
+    name: NotRequired[str]
+
+
+# === DAG Types ===
+
+class CompiledDAG(TypedDict):
+    """DAG as produced by the S-expression compiler."""
+    nodes: List[CompiledNode]
+    output: str
+
+
+class TransformedDAG(TypedDict):
+    """DAG after transformation for artdag execution."""
+    nodes: Dict[str, TransformedNode]
+    output_id: str
+    metadata: NotRequired[Dict[str, Any]]
+
+
+# === Registry Types ===
+
+class AssetEntry(TypedDict, total=False):
+    """Asset in the recipe registry."""
+    cid: str
+    url: str
+
+
+class EffectEntry(TypedDict, total=False):
+    """Effect in the recipe registry."""
+    cid: str
+    url: str
+    temporal: bool
+
+
+class Registry(TypedDict):
+    """Recipe registry containing assets and effects."""
+    assets: Dict[str, AssetEntry]
+    effects: Dict[str, EffectEntry]
+
+
+# === Recipe Types ===
+
+class Recipe(TypedDict, total=False):
+    """Compiled recipe structure."""
+    name: str
+    version: str
+    description: str
+    owner: str
+    registry: Registry
+    dag: CompiledDAG
+    recipe_id: str
+    ipfs_cid: str
+    sexp: str
+    step_count: int
+    error: str
+
+
+# === API Request/Response Types ===
+
+class RecipeRunInputs(TypedDict):
+    """Mapping of input names to CIDs for recipe execution."""
+    # Keys are input names, values are CIDs
+    pass  # Actually just Dict[str, str]
+
+
+class RunResult(TypedDict, total=False):
+    """Result of a recipe run."""
+    run_id: str
+    status: str  # "pending", "running", "completed", "failed"
+    recipe: str
+    inputs: List[str]
+    output_cid: str
+    ipfs_cid: str
+    error: str
+    created_at: str
+    completed_at: str
+
+
+# === Helper functions for type narrowing ===
+
+def is_source_node(node: TransformedNode) -> bool:
+    """Check if node is a SOURCE node."""
+    return node.get("node_type") == "SOURCE"
+
+
+def is_effect_node(node: TransformedNode) -> bool:
+    """Check if node is an EFFECT node."""
+    return node.get("node_type") == "EFFECT"
+
+
+def is_variable_input(config: Dict[str, Any]) -> bool:
+    """Check if a SOURCE node config represents a variable input."""
+    return bool(config.get("input"))
+
+
+def get_effect_cid(config: Dict[str, Any]) -> Optional[str]:
+    """Get effect CID from config, checking both 'cid' and 'hash' keys."""
+    return config.get("cid") or config.get("hash")