Add testing infrastructure and refactor DAG transformation

Testing setup:
- Add pyproject.toml with mypy and pytest configuration
- Add requirements-dev.txt for development dependencies
- Create tests/ directory with test fixtures
- Add 17 unit tests for DAG transformation pipeline

Type annotations:
- Add app/types.py with TypedDict definitions for node configs
- Add typed helper functions: transform_node, build_input_name_mapping,
  bind_inputs, prepare_dag_for_execution
- Refactor run_recipe to use the new typed helpers

Regression tests for today's bugs:
- test_effect_cid_key_not_effect_hash: Verifies CID uses 'cid' key
- test_source_cid_binding_persists: Verifies bound CIDs in final DAG

Run tests with: pytest tests/ -v

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-12 09:37:06 +00:00
parent 0ba1d6e82d
commit 56009c391d
7 changed files with 996 additions and 110 deletions

View File

@@ -4,8 +4,9 @@ Recipe management routes for L1 server.
Handles recipe upload, listing, viewing, and execution.
"""
import json
import logging
from typing import List, Optional
from typing import Any, Dict, List, Optional, Tuple
from fastapi import APIRouter, Request, Depends, HTTPException, UploadFile, File
from fastapi.responses import HTMLResponse
@@ -18,6 +19,10 @@ from artdag_common.middleware.auth import UserContext
from ..dependencies import require_auth, get_templates, get_redis_client, get_cache_manager
from ..services.auth_service import AuthService
from ..services.recipe_service import RecipeService
from ..types import (
CompiledNode, TransformedNode, Registry, Recipe,
is_variable_input, get_effect_cid,
)
router = APIRouter()
logger = logging.getLogger(__name__)
@@ -30,14 +35,184 @@ class RecipeUploadRequest(BaseModel):
class RecipeRunRequest(BaseModel):
inputs: dict = {}
"""Request to run a recipe with variable inputs."""
inputs: Dict[str, str] = {} # Map input names to CIDs
def get_recipe_service():
def get_recipe_service() -> RecipeService:
"""Get recipe service instance."""
return RecipeService(get_redis_client(), get_cache_manager())
def transform_node(
node: CompiledNode,
assets: Dict[str, Dict[str, Any]],
effects: Dict[str, Dict[str, Any]],
) -> TransformedNode:
"""
Transform a compiled node to artdag execution format.
- Resolves asset references to CIDs for SOURCE nodes
- Resolves effect references to CIDs for EFFECT nodes
- Renames 'type' to 'node_type', 'id' to 'node_id'
"""
node_id = node.get("id", "")
config = dict(node.get("config", {})) # Copy to avoid mutation
# Resolve asset references for SOURCE nodes
if node.get("type") == "SOURCE" and "asset" in config:
asset_name = config["asset"]
if asset_name in assets:
config["cid"] = assets[asset_name].get("cid")
# Resolve effect references for EFFECT nodes
if node.get("type") == "EFFECT" and "effect" in config:
effect_name = config["effect"]
if effect_name in effects:
config["cid"] = effects[effect_name].get("cid")
return {
"node_id": node_id,
"node_type": node.get("type", "EFFECT"),
"config": config,
"inputs": node.get("inputs", []),
"name": node.get("name"),
}
def build_input_name_mapping(
nodes: Dict[str, TransformedNode],
) -> Dict[str, str]:
"""
Build a mapping from input names to node IDs for variable inputs.
Variable inputs can be referenced by:
- node_id directly
- config.name (e.g., "Second Video")
- snake_case version (e.g., "second_video")
- kebab-case version (e.g., "second-video")
- node.name (def binding name)
"""
input_name_to_node: Dict[str, str] = {}
for node_id, node in nodes.items():
if node.get("node_type") != "SOURCE":
continue
config = node.get("config", {})
if not is_variable_input(config):
continue
# Map by node_id
input_name_to_node[node_id] = node_id
# Map by config.name
name = config.get("name")
if name:
input_name_to_node[name] = node_id
input_name_to_node[name.lower().replace(" ", "_")] = node_id
input_name_to_node[name.lower().replace(" ", "-")] = node_id
# Map by node.name (def binding)
node_name = node.get("name")
if node_name:
input_name_to_node[node_name] = node_id
input_name_to_node[node_name.replace("-", "_")] = node_id
return input_name_to_node
def bind_inputs(
nodes: Dict[str, TransformedNode],
input_name_to_node: Dict[str, str],
user_inputs: Dict[str, str],
) -> List[str]:
"""
Bind user-provided input CIDs to source nodes.
Returns list of warnings for inputs that couldn't be bound.
"""
warnings: List[str] = []
for input_name, cid in user_inputs.items():
# Try direct node ID match first
if input_name in nodes:
node = nodes[input_name]
if node.get("node_type") == "SOURCE":
node["config"]["cid"] = cid
logger.info(f"Bound input {input_name} directly to node, cid={cid[:16]}...")
continue
# Try input name lookup
if input_name in input_name_to_node:
node_id = input_name_to_node[input_name]
node = nodes[node_id]
node["config"]["cid"] = cid
logger.info(f"Bound input {input_name} via lookup to node {node_id}, cid={cid[:16]}...")
continue
# Input not found
warnings.append(f"Input '{input_name}' not found in recipe")
logger.warning(f"Input {input_name} not found in nodes or input_name_to_node")
return warnings
def prepare_dag_for_execution(
recipe: Recipe,
user_inputs: Dict[str, str],
) -> Tuple[str, List[str]]:
"""
Prepare a recipe DAG for execution by transforming nodes and binding inputs.
Returns (dag_json, warnings).
"""
recipe_dag = recipe.get("dag")
if not recipe_dag or not isinstance(recipe_dag, dict):
raise ValueError("Recipe has no DAG definition")
# Deep copy to avoid mutating original
dag_copy = json.loads(json.dumps(recipe_dag))
nodes = dag_copy.get("nodes", {})
# Get registry for resolving references
registry = recipe.get("registry", {})
assets = registry.get("assets", {}) if registry else {}
effects = registry.get("effects", {}) if registry else {}
# Transform nodes from list to dict if needed
if isinstance(nodes, list):
nodes_dict: Dict[str, TransformedNode] = {}
for node in nodes:
node_id = node.get("id")
if node_id:
nodes_dict[node_id] = transform_node(node, assets, effects)
nodes = nodes_dict
dag_copy["nodes"] = nodes
# Build input name mapping and bind user inputs
input_name_to_node = build_input_name_mapping(nodes)
logger.info(f"Input name to node mapping: {input_name_to_node}")
logger.info(f"User-provided inputs: {user_inputs}")
warnings = bind_inputs(nodes, input_name_to_node, user_inputs)
# Log final SOURCE node configs for debugging
for nid, n in nodes.items():
if n.get("node_type") == "SOURCE":
logger.info(f"Final SOURCE node {nid}: config={n.get('config')}")
# Transform output to output_id
if "output" in dag_copy:
dag_copy["output_id"] = dag_copy.pop("output")
# Add metadata if not present
if "metadata" not in dag_copy:
dag_copy["metadata"] = {}
return json.dumps(dag_copy), warnings
@router.post("/upload")
async def upload_recipe(
file: UploadFile = File(...),
@@ -320,117 +495,15 @@ async def run_recipe(
raise HTTPException(404, "Recipe not found")
try:
import json
# Create run using run service
run_service = RunService(database, get_redis_client(), get_cache_manager())
# If recipe has a DAG definition, bind inputs and convert to JSON
recipe_dag = recipe.get("dag")
# Prepare DAG for execution (transform nodes, bind inputs)
dag_json = None
if recipe_dag and isinstance(recipe_dag, dict):
# Bind inputs to the DAG's source nodes
dag_copy = json.loads(json.dumps(recipe_dag)) # Deep copy
nodes = dag_copy.get("nodes", {})
# Get registry for resolving asset/effect references
registry = recipe.get("registry", {})
assets = registry.get("assets", {})
effects = registry.get("effects", {})
# Convert nodes from list to dict if needed, and transform to artdag format
if isinstance(nodes, list):
nodes_dict = {}
for node in nodes:
node_id = node.get("id")
if node_id:
config = node.get("config", {})
# Resolve asset references for SOURCE nodes
if node.get("type") == "SOURCE" and "asset" in config:
asset_name = config["asset"]
if asset_name in assets:
config["cid"] = assets[asset_name].get("cid")
# Resolve effect references for EFFECT nodes
if node.get("type") == "EFFECT" and "effect" in config:
effect_name = config["effect"]
if effect_name in effects:
# Use "cid" - the executor looks for this field
config["cid"] = effects[effect_name].get("cid")
# Transform to artdag format: type->node_type, id->node_id
transformed = {
"node_id": node_id,
"node_type": node.get("type", "EFFECT"),
"config": config,
"inputs": node.get("inputs", []),
"name": node.get("name"),
}
nodes_dict[node_id] = transformed
nodes = nodes_dict
dag_copy["nodes"] = nodes
# Build lookup for variable inputs: map input names to node IDs
# Variable inputs can be referenced by: node_id, config.name, config.input (if string)
input_name_to_node = {}
for node_id, node in nodes.items():
logger.debug(f"Checking node {node_id}: type={node.get('node_type')}, config={node.get('config')}")
if node.get("node_type") == "SOURCE":
config = node.get("config", {})
# Only variable inputs (those with 'input' in config, not fixed assets)
if config.get("input"):
input_name_to_node[node_id] = node_id
# Map by config.name (e.g., "Second Video")
if config.get("name"):
name = config["name"]
input_name_to_node[name] = node_id
# Also allow snake_case version
input_name_to_node[name.lower().replace(" ", "_")] = node_id
input_name_to_node[name.lower().replace(" ", "-")] = node_id
# Map by node.name if available (def binding)
if node.get("name"):
input_name_to_node[node["name"]] = node_id
input_name_to_node[node["name"].replace("-", "_")] = node_id
logger.info(f"Input name to node mapping: {input_name_to_node}")
logger.info(f"User-provided inputs: {req.inputs}")
# Map user-provided input names to content hashes (for variable inputs)
for input_name, cid in req.inputs.items():
# Try direct node ID match first
if input_name in nodes:
node = nodes[input_name]
if node.get("node_type") == "SOURCE":
if "config" not in node:
node["config"] = {}
node["config"]["cid"] = cid
logger.info(f"Bound input {input_name} directly to node, cid={cid[:16]}...")
# Try input name lookup
elif input_name in input_name_to_node:
node_id = input_name_to_node[input_name]
node = nodes[node_id]
if "config" not in node:
node["config"] = {}
node["config"]["cid"] = cid
logger.info(f"Bound input {input_name} via lookup to node {node_id}, cid={cid[:16]}...")
else:
logger.warning(f"Input {input_name} not found in nodes or input_name_to_node")
# Log final DAG nodes for debugging
for nid, n in nodes.items():
if n.get("node_type") == "SOURCE":
logger.info(f"Final SOURCE node {nid}: config={n.get('config')}")
# Transform output to output_id
if "output" in dag_copy:
dag_copy["output_id"] = dag_copy.pop("output")
# Add metadata if not present
if "metadata" not in dag_copy:
dag_copy["metadata"] = {}
dag_json = json.dumps(dag_copy)
if recipe.get("dag"):
dag_json, warnings = prepare_dag_for_execution(recipe, req.inputs)
for warning in warnings:
logger.warning(warning)
run, error = await run_service.create_run(
recipe=recipe_id, # Use recipe hash as primary identifier