Add testing infrastructure and refactor DAG transformation

Testing setup:
- Add pyproject.toml with mypy and pytest configuration
- Add requirements-dev.txt for development dependencies
- Create tests/ directory with test fixtures
- Add 17 unit tests for DAG transformation pipeline

Type annotations:
- Add app/types.py with TypedDict definitions for node configs
- Add typed helper functions: transform_node, build_input_name_mapping,
  bind_inputs, prepare_dag_for_execution
- Refactor run_recipe to use the new typed helpers

Regression tests for today's bugs:
- test_effect_cid_key_not_effect_hash: Verifies CID uses 'cid' key
- test_source_cid_binding_persists: Verifies bound CIDs in final DAG

Run tests with: pytest tests/ -v

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-12 09:37:06 +00:00
parent 0ba1d6e82d
commit 56009c391d
7 changed files with 996 additions and 110 deletions

View File

@@ -4,8 +4,9 @@ Recipe management routes for L1 server.
Handles recipe upload, listing, viewing, and execution.
"""
import json
import logging
from typing import List, Optional
from typing import Any, Dict, List, Optional, Tuple
from fastapi import APIRouter, Request, Depends, HTTPException, UploadFile, File
from fastapi.responses import HTMLResponse
@@ -18,6 +19,10 @@ from artdag_common.middleware.auth import UserContext
from ..dependencies import require_auth, get_templates, get_redis_client, get_cache_manager
from ..services.auth_service import AuthService
from ..services.recipe_service import RecipeService
from ..types import (
CompiledNode, TransformedNode, Registry, Recipe,
is_variable_input, get_effect_cid,
)
router = APIRouter()
logger = logging.getLogger(__name__)
@@ -30,14 +35,184 @@ class RecipeUploadRequest(BaseModel):
class RecipeRunRequest(BaseModel):
inputs: dict = {}
"""Request to run a recipe with variable inputs."""
inputs: Dict[str, str] = {} # Map input names to CIDs
def get_recipe_service():
def get_recipe_service() -> RecipeService:
"""Get recipe service instance."""
return RecipeService(get_redis_client(), get_cache_manager())
def transform_node(
node: CompiledNode,
assets: Dict[str, Dict[str, Any]],
effects: Dict[str, Dict[str, Any]],
) -> TransformedNode:
"""
Transform a compiled node to artdag execution format.
- Resolves asset references to CIDs for SOURCE nodes
- Resolves effect references to CIDs for EFFECT nodes
- Renames 'type' to 'node_type', 'id' to 'node_id'
"""
node_id = node.get("id", "")
config = dict(node.get("config", {})) # Copy to avoid mutation
# Resolve asset references for SOURCE nodes
if node.get("type") == "SOURCE" and "asset" in config:
asset_name = config["asset"]
if asset_name in assets:
config["cid"] = assets[asset_name].get("cid")
# Resolve effect references for EFFECT nodes
if node.get("type") == "EFFECT" and "effect" in config:
effect_name = config["effect"]
if effect_name in effects:
config["cid"] = effects[effect_name].get("cid")
return {
"node_id": node_id,
"node_type": node.get("type", "EFFECT"),
"config": config,
"inputs": node.get("inputs", []),
"name": node.get("name"),
}
def build_input_name_mapping(
nodes: Dict[str, TransformedNode],
) -> Dict[str, str]:
"""
Build a mapping from input names to node IDs for variable inputs.
Variable inputs can be referenced by:
- node_id directly
- config.name (e.g., "Second Video")
- snake_case version (e.g., "second_video")
- kebab-case version (e.g., "second-video")
- node.name (def binding name)
"""
input_name_to_node: Dict[str, str] = {}
for node_id, node in nodes.items():
if node.get("node_type") != "SOURCE":
continue
config = node.get("config", {})
if not is_variable_input(config):
continue
# Map by node_id
input_name_to_node[node_id] = node_id
# Map by config.name
name = config.get("name")
if name:
input_name_to_node[name] = node_id
input_name_to_node[name.lower().replace(" ", "_")] = node_id
input_name_to_node[name.lower().replace(" ", "-")] = node_id
# Map by node.name (def binding)
node_name = node.get("name")
if node_name:
input_name_to_node[node_name] = node_id
input_name_to_node[node_name.replace("-", "_")] = node_id
return input_name_to_node
def bind_inputs(
nodes: Dict[str, TransformedNode],
input_name_to_node: Dict[str, str],
user_inputs: Dict[str, str],
) -> List[str]:
"""
Bind user-provided input CIDs to source nodes.
Returns list of warnings for inputs that couldn't be bound.
"""
warnings: List[str] = []
for input_name, cid in user_inputs.items():
# Try direct node ID match first
if input_name in nodes:
node = nodes[input_name]
if node.get("node_type") == "SOURCE":
node["config"]["cid"] = cid
logger.info(f"Bound input {input_name} directly to node, cid={cid[:16]}...")
continue
# Try input name lookup
if input_name in input_name_to_node:
node_id = input_name_to_node[input_name]
node = nodes[node_id]
node["config"]["cid"] = cid
logger.info(f"Bound input {input_name} via lookup to node {node_id}, cid={cid[:16]}...")
continue
# Input not found
warnings.append(f"Input '{input_name}' not found in recipe")
logger.warning(f"Input {input_name} not found in nodes or input_name_to_node")
return warnings
def prepare_dag_for_execution(
recipe: Recipe,
user_inputs: Dict[str, str],
) -> Tuple[str, List[str]]:
"""
Prepare a recipe DAG for execution by transforming nodes and binding inputs.
Returns (dag_json, warnings).
"""
recipe_dag = recipe.get("dag")
if not recipe_dag or not isinstance(recipe_dag, dict):
raise ValueError("Recipe has no DAG definition")
# Deep copy to avoid mutating original
dag_copy = json.loads(json.dumps(recipe_dag))
nodes = dag_copy.get("nodes", {})
# Get registry for resolving references
registry = recipe.get("registry", {})
assets = registry.get("assets", {}) if registry else {}
effects = registry.get("effects", {}) if registry else {}
# Transform nodes from list to dict if needed
if isinstance(nodes, list):
nodes_dict: Dict[str, TransformedNode] = {}
for node in nodes:
node_id = node.get("id")
if node_id:
nodes_dict[node_id] = transform_node(node, assets, effects)
nodes = nodes_dict
dag_copy["nodes"] = nodes
# Build input name mapping and bind user inputs
input_name_to_node = build_input_name_mapping(nodes)
logger.info(f"Input name to node mapping: {input_name_to_node}")
logger.info(f"User-provided inputs: {user_inputs}")
warnings = bind_inputs(nodes, input_name_to_node, user_inputs)
# Log final SOURCE node configs for debugging
for nid, n in nodes.items():
if n.get("node_type") == "SOURCE":
logger.info(f"Final SOURCE node {nid}: config={n.get('config')}")
# Transform output to output_id
if "output" in dag_copy:
dag_copy["output_id"] = dag_copy.pop("output")
# Add metadata if not present
if "metadata" not in dag_copy:
dag_copy["metadata"] = {}
return json.dumps(dag_copy), warnings
@router.post("/upload")
async def upload_recipe(
file: UploadFile = File(...),
@@ -320,117 +495,15 @@ async def run_recipe(
raise HTTPException(404, "Recipe not found")
try:
import json
# Create run using run service
run_service = RunService(database, get_redis_client(), get_cache_manager())
# If recipe has a DAG definition, bind inputs and convert to JSON
recipe_dag = recipe.get("dag")
# Prepare DAG for execution (transform nodes, bind inputs)
dag_json = None
if recipe_dag and isinstance(recipe_dag, dict):
# Bind inputs to the DAG's source nodes
dag_copy = json.loads(json.dumps(recipe_dag)) # Deep copy
nodes = dag_copy.get("nodes", {})
# Get registry for resolving asset/effect references
registry = recipe.get("registry", {})
assets = registry.get("assets", {})
effects = registry.get("effects", {})
# Convert nodes from list to dict if needed, and transform to artdag format
if isinstance(nodes, list):
nodes_dict = {}
for node in nodes:
node_id = node.get("id")
if node_id:
config = node.get("config", {})
# Resolve asset references for SOURCE nodes
if node.get("type") == "SOURCE" and "asset" in config:
asset_name = config["asset"]
if asset_name in assets:
config["cid"] = assets[asset_name].get("cid")
# Resolve effect references for EFFECT nodes
if node.get("type") == "EFFECT" and "effect" in config:
effect_name = config["effect"]
if effect_name in effects:
# Use "cid" - the executor looks for this field
config["cid"] = effects[effect_name].get("cid")
# Transform to artdag format: type->node_type, id->node_id
transformed = {
"node_id": node_id,
"node_type": node.get("type", "EFFECT"),
"config": config,
"inputs": node.get("inputs", []),
"name": node.get("name"),
}
nodes_dict[node_id] = transformed
nodes = nodes_dict
dag_copy["nodes"] = nodes
# Build lookup for variable inputs: map input names to node IDs
# Variable inputs can be referenced by: node_id, config.name, config.input (if string)
input_name_to_node = {}
for node_id, node in nodes.items():
logger.debug(f"Checking node {node_id}: type={node.get('node_type')}, config={node.get('config')}")
if node.get("node_type") == "SOURCE":
config = node.get("config", {})
# Only variable inputs (those with 'input' in config, not fixed assets)
if config.get("input"):
input_name_to_node[node_id] = node_id
# Map by config.name (e.g., "Second Video")
if config.get("name"):
name = config["name"]
input_name_to_node[name] = node_id
# Also allow snake_case version
input_name_to_node[name.lower().replace(" ", "_")] = node_id
input_name_to_node[name.lower().replace(" ", "-")] = node_id
# Map by node.name if available (def binding)
if node.get("name"):
input_name_to_node[node["name"]] = node_id
input_name_to_node[node["name"].replace("-", "_")] = node_id
logger.info(f"Input name to node mapping: {input_name_to_node}")
logger.info(f"User-provided inputs: {req.inputs}")
# Map user-provided input names to content hashes (for variable inputs)
for input_name, cid in req.inputs.items():
# Try direct node ID match first
if input_name in nodes:
node = nodes[input_name]
if node.get("node_type") == "SOURCE":
if "config" not in node:
node["config"] = {}
node["config"]["cid"] = cid
logger.info(f"Bound input {input_name} directly to node, cid={cid[:16]}...")
# Try input name lookup
elif input_name in input_name_to_node:
node_id = input_name_to_node[input_name]
node = nodes[node_id]
if "config" not in node:
node["config"] = {}
node["config"]["cid"] = cid
logger.info(f"Bound input {input_name} via lookup to node {node_id}, cid={cid[:16]}...")
else:
logger.warning(f"Input {input_name} not found in nodes or input_name_to_node")
# Log final DAG nodes for debugging
for nid, n in nodes.items():
if n.get("node_type") == "SOURCE":
logger.info(f"Final SOURCE node {nid}: config={n.get('config')}")
# Transform output to output_id
if "output" in dag_copy:
dag_copy["output_id"] = dag_copy.pop("output")
# Add metadata if not present
if "metadata" not in dag_copy:
dag_copy["metadata"] = {}
dag_json = json.dumps(dag_copy)
if recipe.get("dag"):
dag_json, warnings = prepare_dag_for_execution(recipe, req.inputs)
for warning in warnings:
logger.warning(warning)
run, error = await run_service.create_run(
recipe=recipe_id, # Use recipe hash as primary identifier

160
app/types.py Normal file
View File

@@ -0,0 +1,160 @@
"""
Type definitions for Art DAG L1 server.
Uses TypedDict for configuration structures to enable mypy checking.
"""
from typing import Any, Dict, List, Optional, TypedDict, Union
from typing_extensions import NotRequired
# === Node Config Types ===
class SourceConfig(TypedDict, total=False):
"""Config for SOURCE nodes."""
cid: str # Content ID (IPFS CID or SHA3-256 hash)
asset: str # Asset name from registry
input: bool # True if this is a variable input
name: str # Human-readable name for variable inputs
description: str # Description for variable inputs
class EffectConfig(TypedDict, total=False):
"""Config for EFFECT nodes."""
effect: str # Effect name
cid: str # Effect CID (for cached/IPFS effects)
# Effect parameters are additional keys
intensity: float
level: float
class SequenceConfig(TypedDict, total=False):
"""Config for SEQUENCE nodes."""
transition: Dict[str, Any] # Transition config
class SegmentConfig(TypedDict, total=False):
"""Config for SEGMENT nodes."""
start: float
end: float
duration: float
# Union of all config types
NodeConfig = Union[SourceConfig, EffectConfig, SequenceConfig, SegmentConfig, Dict[str, Any]]
# === Node Types ===
class CompiledNode(TypedDict):
"""Node as produced by the S-expression compiler."""
id: str
type: str # "SOURCE", "EFFECT", "SEQUENCE", etc.
config: Dict[str, Any]
inputs: List[str]
name: NotRequired[str]
class TransformedNode(TypedDict):
"""Node after transformation for artdag execution."""
node_id: str
node_type: str
config: Dict[str, Any]
inputs: List[str]
name: NotRequired[str]
# === DAG Types ===
class CompiledDAG(TypedDict):
"""DAG as produced by the S-expression compiler."""
nodes: List[CompiledNode]
output: str
class TransformedDAG(TypedDict):
"""DAG after transformation for artdag execution."""
nodes: Dict[str, TransformedNode]
output_id: str
metadata: NotRequired[Dict[str, Any]]
# === Registry Types ===
class AssetEntry(TypedDict, total=False):
"""Asset in the recipe registry."""
cid: str
url: str
class EffectEntry(TypedDict, total=False):
"""Effect in the recipe registry."""
cid: str
url: str
temporal: bool
class Registry(TypedDict):
"""Recipe registry containing assets and effects."""
assets: Dict[str, AssetEntry]
effects: Dict[str, EffectEntry]
# === Recipe Types ===
class Recipe(TypedDict, total=False):
"""Compiled recipe structure."""
name: str
version: str
description: str
owner: str
registry: Registry
dag: CompiledDAG
recipe_id: str
ipfs_cid: str
sexp: str
step_count: int
error: str
# === API Request/Response Types ===
class RecipeRunInputs(TypedDict):
"""Mapping of input names to CIDs for recipe execution."""
# Keys are input names, values are CIDs
pass # Actually just Dict[str, str]
class RunResult(TypedDict, total=False):
"""Result of a recipe run."""
run_id: str
status: str # "pending", "running", "completed", "failed"
recipe: str
inputs: List[str]
output_cid: str
ipfs_cid: str
error: str
created_at: str
completed_at: str
# === Helper functions for type narrowing ===
def is_source_node(node: TransformedNode) -> bool:
"""Check if node is a SOURCE node."""
return node.get("node_type") == "SOURCE"
def is_effect_node(node: TransformedNode) -> bool:
"""Check if node is an EFFECT node."""
return node.get("node_type") == "EFFECT"
def is_variable_input(config: Dict[str, Any]) -> bool:
"""Check if a SOURCE node config represents a variable input."""
return bool(config.get("input"))
def get_effect_cid(config: Dict[str, Any]) -> Optional[str]:
"""Get effect CID from config, checking both 'cid' and 'hash' keys."""
return config.get("cid") or config.get("hash")