Add multi-step DAG execution support

tasks.py:
- Import artdag DAG, Node, Engine, Executor
- Register executors for effect:dog, effect:identity, SOURCE
- Add execute_dag task for running full DAG workflows
- Add build_effect_dag helper for simple effect-to-DAG conversion

server.py:
- Add use_dag and dag_json fields to RunRequest
- Update create_run to support DAG mode
- Handle both legacy render_effect and new execute_dag result formats
- Import new tasks (execute_dag, build_effect_dag)

The DAG engine executes nodes in topological order with automatic
caching. This enables multi-step pipelines like: source -> effect1 ->
effect2 -> output.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-08 01:45:29 +00:00
parent 034c7542c4
commit ff195a7ce5
2 changed files with 238 additions and 25 deletions

View File

@@ -26,7 +26,7 @@ import requests as http_requests
from urllib.parse import urlparse
from celery_app import app as celery_app
from tasks import render_effect
from tasks import render_effect, execute_dag, build_effect_dag
from cache_manager import L1CacheManager, get_cache_manager
# L2 server for auth verification
@@ -98,9 +98,11 @@ app = FastAPI(
class RunRequest(BaseModel):
"""Request to start a run."""
recipe: str # Recipe name (e.g., "dog", "identity")
recipe: str # Recipe name (e.g., "dog", "identity") or "dag" for custom DAG
inputs: list[str] # List of content hashes
output_name: Optional[str] = None
use_dag: bool = False # Use DAG engine instead of legacy effect runner
dag_json: Optional[str] = None # Custom DAG JSON (required if recipe="dag")
class RunStatus(BaseModel):
@@ -301,13 +303,25 @@ async def create_run(request: RunRequest, username: str = Depends(get_required_u
)
# Submit to Celery
# For now, we only support single-input recipes
if len(request.inputs) != 1:
raise HTTPException(400, "Currently only single-input recipes supported")
if request.use_dag or request.recipe == "dag":
# DAG mode - use artdag engine
if request.dag_json:
# Custom DAG provided
dag_json = request.dag_json
else:
# Build simple effect DAG from recipe and inputs
dag = build_effect_dag(request.inputs, request.recipe)
dag_json = dag.to_json()
input_hash = request.inputs[0]
task = execute_dag.delay(dag_json, run.run_id)
else:
# Legacy mode - single effect
if len(request.inputs) != 1:
raise HTTPException(400, "Legacy mode only supports single-input recipes. Use use_dag=true for multi-input.")
input_hash = request.inputs[0]
task = render_effect.delay(input_hash, request.recipe, output_name)
task = render_effect.delay(input_hash, request.recipe, output_name)
run.celery_task_id = task.id
run.status = "running"
@@ -331,29 +345,37 @@ async def get_run(run_id: str):
result = task.result
run.status = "completed"
run.completed_at = datetime.now(timezone.utc).isoformat()
run.output_hash = result.get("output", {}).get("content_hash")
# Extract effects info from provenance
effects = result.get("effects", [])
if effects:
run.effects_commit = effects[0].get("repo_commit")
run.effect_url = effects[0].get("repo_url")
# Handle both legacy (render_effect) and new (execute_dag) result formats
if "output_hash" in result:
# New DAG result format
run.output_hash = result.get("output_hash")
output_path = Path(result.get("output_path", "")) if result.get("output_path") else None
else:
# Legacy render_effect format
run.output_hash = result.get("output", {}).get("content_hash")
output_path = Path(result.get("output", {}).get("local_path", ""))
# Extract infrastructure info
run.infrastructure = result.get("infrastructure")
# Extract effects info from provenance (legacy only)
effects = result.get("effects", [])
if effects:
run.effects_commit = effects[0].get("repo_commit")
run.effect_url = effects[0].get("repo_url")
# Cache the output
output_path = Path(result.get("output", {}).get("local_path", ""))
if output_path.exists():
# Extract infrastructure info (legacy only)
run.infrastructure = result.get("infrastructure")
# Cache the output (legacy mode - DAG already caches via cache_manager)
if output_path and output_path.exists() and "output_hash" not in result:
cache_file(output_path, node_type="effect_output")
# Record activity for deletion tracking
if run.output_hash and run.inputs:
cache_manager.record_simple_activity(
input_hashes=run.inputs,
output_hash=run.output_hash,
run_id=run.run_id,
)
# Record activity for deletion tracking (legacy mode)
if run.output_hash and run.inputs:
cache_manager.record_simple_activity(
input_hashes=run.inputs,
output_hash=run.output_hash,
run_id=run.run_id,
)
else:
run.status = "failed"
run.error = str(task.result)