Add analysis caching and segment looping for short videos
- Add _cache_analysis_tracks() to cache each analysis track individually with content-hash IDs, replacing inline data with cache-id refs - Add _resolve_analysis_refs() to resolve cache-id refs back to full data - Add extract_segment_with_loop() helper that detects when output is shorter than requested duration and re-runs with -stream_loop -1 - Update COMPOUND handler's FFmpeg and Python paths to use looping - This fixes videos shorter than audio duration being truncated Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
1148
execute.py
1148
execute.py
File diff suppressed because it is too large
Load Diff
220
run_staged.py
220
run_staged.py
@@ -18,6 +18,7 @@ The script:
|
||||
3. Produce final output
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import tempfile
|
||||
@@ -30,9 +31,37 @@ from typing import Dict, List, Optional, Any
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "artdag"))
|
||||
|
||||
from artdag.sexp import compile_string, parse
|
||||
from artdag.sexp.parser import Symbol, Keyword
|
||||
from artdag.sexp.parser import Symbol, Keyword, serialize
|
||||
from artdag.sexp.planner import create_plan
|
||||
|
||||
# Import unified cache
|
||||
import cache as unified_cache
|
||||
|
||||
import hashlib
|
||||
|
||||
|
||||
def _cache_analysis_tracks(plan):
|
||||
"""Cache each analysis track individually, replace data with cache-id refs."""
|
||||
import json as _json
|
||||
for name, data in plan.analysis.items():
|
||||
json_str = _json.dumps(data, sort_keys=True)
|
||||
content_cid = hashlib.sha256(json_str.encode()).hexdigest()
|
||||
unified_cache.cache_store_json(content_cid, data)
|
||||
plan.analysis[name] = {"_cache_id": content_cid}
|
||||
|
||||
|
||||
def _resolve_analysis_refs(analysis_dict):
|
||||
"""Resolve cache-id refs back to full analysis data."""
|
||||
resolved = {}
|
||||
for name, data in analysis_dict.items():
|
||||
if isinstance(data, dict) and "_cache_id" in data:
|
||||
loaded = unified_cache.cache_get_json(data["_cache_id"])
|
||||
if loaded:
|
||||
resolved[name] = loaded
|
||||
else:
|
||||
resolved[name] = data
|
||||
return resolved
|
||||
|
||||
|
||||
def run_staged_recipe(
|
||||
recipe_path: Path,
|
||||
@@ -40,6 +69,7 @@ def run_staged_recipe(
|
||||
cache_dir: Optional[Path] = None,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
verbose: bool = True,
|
||||
force_replan: bool = False,
|
||||
) -> Path:
|
||||
"""
|
||||
Run a staged recipe with stage-level caching.
|
||||
@@ -57,21 +87,56 @@ def run_staged_recipe(
|
||||
recipe_text = recipe_path.read_text()
|
||||
recipe_dir = recipe_path.parent
|
||||
|
||||
# Set up cache directory
|
||||
if cache_dir is None:
|
||||
cache_dir = recipe_dir / ".stage_cache"
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
# Use unified cache
|
||||
content_cache_dir = unified_cache.get_content_dir()
|
||||
|
||||
def log(msg: str):
|
||||
if verbose:
|
||||
print(msg, file=sys.stderr)
|
||||
|
||||
# Store recipe source by CID
|
||||
recipe_cid, _ = unified_cache.content_store_string(recipe_text)
|
||||
log(f"Recipe CID: {recipe_cid[:16]}...")
|
||||
|
||||
# Compile recipe
|
||||
log(f"Compiling: {recipe_path}")
|
||||
compiled = compile_string(recipe_text, params)
|
||||
compiled = compile_string(recipe_text, params, recipe_dir=recipe_dir)
|
||||
log(f"Recipe: {compiled.name} v{compiled.version}")
|
||||
log(f"Nodes: {len(compiled.nodes)}")
|
||||
|
||||
# Store effects by CID
|
||||
for effect_name, effect_info in compiled.registry.get("effects", {}).items():
|
||||
effect_path = effect_info.get("path")
|
||||
effect_cid = effect_info.get("cid")
|
||||
if effect_path and effect_cid:
|
||||
effect_file = Path(effect_path)
|
||||
if effect_file.exists():
|
||||
stored_cid, _ = unified_cache.content_store_file(effect_file)
|
||||
if stored_cid == effect_cid:
|
||||
log(f"Effect '{effect_name}' CID: {effect_cid[:16]}...")
|
||||
else:
|
||||
log(f"Warning: Effect '{effect_name}' CID mismatch")
|
||||
|
||||
# Store analyzers by CID
|
||||
for analyzer_name, analyzer_info in compiled.registry.get("analyzers", {}).items():
|
||||
analyzer_path = analyzer_info.get("path")
|
||||
analyzer_cid = analyzer_info.get("cid")
|
||||
if analyzer_path:
|
||||
analyzer_file = Path(analyzer_path) if Path(analyzer_path).is_absolute() else recipe_dir / analyzer_path
|
||||
if analyzer_file.exists():
|
||||
stored_cid, _ = unified_cache.content_store_file(analyzer_file)
|
||||
log(f"Analyzer '{analyzer_name}' CID: {stored_cid[:16]}...")
|
||||
|
||||
# Store included files by CID
|
||||
for include_path, include_cid in compiled.registry.get("includes", {}).items():
|
||||
include_file = Path(include_path)
|
||||
if include_file.exists():
|
||||
stored_cid, _ = unified_cache.content_store_file(include_file)
|
||||
if stored_cid == include_cid:
|
||||
log(f"Include '{include_file.name}' CID: {include_cid[:16]}...")
|
||||
else:
|
||||
log(f"Warning: Include '{include_file.name}' CID mismatch")
|
||||
|
||||
# Check for stages
|
||||
if not compiled.stages:
|
||||
log("No stages found - running as regular recipe")
|
||||
@@ -96,6 +161,53 @@ def run_staged_recipe(
|
||||
times = results.get("times", [])
|
||||
log(f" Analysis complete: {node_id[:16]}... ({len(times)} times)")
|
||||
|
||||
# Check for cached plan using unified cache
|
||||
plan_cid = unified_cache.plan_exists(recipe_cid, params)
|
||||
|
||||
if plan_cid and not force_replan:
|
||||
plan_cache_path = unified_cache.plan_get_path(recipe_cid, params)
|
||||
log(f"\nFound cached plan: {plan_cid[:16]}...")
|
||||
plan_sexp_str = unified_cache.plan_load(recipe_cid, params)
|
||||
|
||||
# Parse the cached plan
|
||||
from execute import parse_plan_input
|
||||
plan_dict = parse_plan_input(plan_sexp_str)
|
||||
|
||||
# Resolve cache-id refs in plan's embedded analysis
|
||||
if "analysis" in plan_dict:
|
||||
plan_dict["analysis"] = _resolve_analysis_refs(plan_dict["analysis"])
|
||||
|
||||
# Load analysis data from unified cache
|
||||
analysis_data = {}
|
||||
for step in plan_dict.get("steps", []):
|
||||
if step.get("node_type") == "ANALYZE":
|
||||
step_id = step.get("step_id")
|
||||
cached_analysis = unified_cache.cache_get_json(step_id)
|
||||
if cached_analysis:
|
||||
analysis_data[step_id] = cached_analysis
|
||||
log(f" Loaded analysis: {step_id[:16]}...")
|
||||
|
||||
log(f"Plan ID: {plan_dict.get('plan_id', 'unknown')[:16]}...")
|
||||
log(f"Steps: {len(plan_dict.get('steps', []))}")
|
||||
log(f"Analysis tracks: {list(analysis_data.keys())}")
|
||||
|
||||
# Execute directly from cached plan
|
||||
log("\n--- Execution (from cached plan) ---")
|
||||
from execute import execute_plan
|
||||
|
||||
result_path = execute_plan(
|
||||
plan_path=plan_cache_path,
|
||||
output_path=output_path,
|
||||
recipe_dir=recipe_dir,
|
||||
external_analysis=analysis_data,
|
||||
cache_dir=content_cache_dir,
|
||||
)
|
||||
|
||||
log(f"\n--- Complete ---")
|
||||
log(f"Output: {result_path}")
|
||||
return result_path
|
||||
|
||||
# No cached plan - create new one
|
||||
plan = create_plan(
|
||||
compiled,
|
||||
inputs={},
|
||||
@@ -105,18 +217,29 @@ def run_staged_recipe(
|
||||
|
||||
log(f"\nPlan ID: {plan.plan_id[:16]}...")
|
||||
log(f"Steps: {len(plan.steps)}")
|
||||
log(f"Analysis tracks: {list(analysis_data.keys())}")
|
||||
|
||||
# Cache analysis tracks individually and replace with cache-id refs
|
||||
_cache_analysis_tracks(plan)
|
||||
|
||||
# Save plan to unified cache
|
||||
plan_sexp_str = plan.to_string(pretty=True)
|
||||
plan_cache_id, plan_cid, plan_cache_path = unified_cache.plan_store(recipe_cid, params, plan_sexp_str)
|
||||
log(f"Saved plan: {plan_cache_id[:16]}... → {plan_cid[:16]}...")
|
||||
|
||||
# Execute the plan using execute.py logic
|
||||
log("\n--- Execution ---")
|
||||
from execute import execute_plan
|
||||
|
||||
# Resolve cache-id refs back to full data for execution
|
||||
resolved_analysis = _resolve_analysis_refs(plan.analysis)
|
||||
|
||||
plan_dict = {
|
||||
"plan_id": plan.plan_id,
|
||||
"recipe_id": compiled.name,
|
||||
"recipe_hash": plan.recipe_hash,
|
||||
"source_hash": plan.source_hash,
|
||||
"encoding": compiled.encoding,
|
||||
"output_step_id": plan.output_step_id,
|
||||
"analysis": analysis_data,
|
||||
"analysis": {**resolved_analysis, **analysis_data},
|
||||
"effects_registry": plan.effects_registry,
|
||||
"minimal_primitives": plan.minimal_primitives,
|
||||
"steps": [],
|
||||
@@ -134,16 +257,16 @@ def run_staged_recipe(
|
||||
# Tag with stage info if present
|
||||
if step.stage:
|
||||
step_dict["stage"] = step.stage
|
||||
step_dict["stage_cache_id"] = step.stage_cache_id
|
||||
plan_dict["steps"].append(step_dict)
|
||||
|
||||
# Execute
|
||||
# Execute using unified cache
|
||||
result_path = execute_plan(
|
||||
plan_path=None,
|
||||
output_path=output_path,
|
||||
recipe_dir=recipe_dir,
|
||||
plan_data=plan_dict,
|
||||
external_analysis=analysis_data,
|
||||
cache_dir=content_cache_dir,
|
||||
)
|
||||
|
||||
log(f"\n--- Complete ---")
|
||||
@@ -162,6 +285,11 @@ def _run_non_staged(compiled, recipe_dir: Path, output_path: Optional[Path], ver
|
||||
raise NotImplementedError("Non-staged recipes should use plan.py | execute.py")
|
||||
|
||||
|
||||
def list_cache(verbose: bool = False):
|
||||
"""List all cached items using the unified cache."""
|
||||
unified_cache.print_cache_listing(verbose)
|
||||
|
||||
|
||||
def list_params(recipe_path: Path):
|
||||
"""List available parameters for a recipe and its effects."""
|
||||
from artdag.sexp import parse
|
||||
@@ -283,16 +411,41 @@ Examples:
|
||||
python3 run_staged.py recipe.sexp -p color_mode=lime -p char_jitter=5
|
||||
"""
|
||||
)
|
||||
parser.add_argument("recipe", type=Path, help="Recipe file (.sexp)")
|
||||
parser.add_argument("recipe", type=Path, nargs="?", help="Recipe file (.sexp)")
|
||||
parser.add_argument("-o", "--output", type=Path, help="Output file path")
|
||||
parser.add_argument("-c", "--cache", type=Path, help="Stage cache directory")
|
||||
parser.add_argument("-p", "--param", action="append", dest="params",
|
||||
metavar="KEY=VALUE", help="Set recipe parameter")
|
||||
parser.add_argument("-q", "--quiet", action="store_true", help="Suppress progress output")
|
||||
parser.add_argument("--list-params", action="store_true", help="List available parameters and exit")
|
||||
parser.add_argument("--list-cache", action="store_true", help="List cached items and exit")
|
||||
parser.add_argument("--no-cache", action="store_true", help="Ignore cached plan, force re-planning")
|
||||
parser.add_argument("--show-plan", action="store_true", help="Show the plan S-expression and exit (don't execute)")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
||||
parser.add_argument("-j", "--jobs", type=int, default=None,
|
||||
help="Max parallel workers (default: 4, or ARTDAG_WORKERS env)")
|
||||
parser.add_argument("--pipelines", type=int, default=None,
|
||||
help="Max concurrent video pipelines (default: 1, or ARTDAG_VIDEO_PIPELINES env)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Apply concurrency limits before any execution
|
||||
if args.jobs is not None:
|
||||
os.environ["ARTDAG_WORKERS"] = str(args.jobs)
|
||||
if args.pipelines is not None:
|
||||
os.environ["ARTDAG_VIDEO_PIPELINES"] = str(args.pipelines)
|
||||
from execute import set_max_video_pipelines
|
||||
set_max_video_pipelines(args.pipelines)
|
||||
|
||||
# List cache mode - doesn't require recipe
|
||||
if args.list_cache:
|
||||
list_cache(verbose=args.verbose)
|
||||
sys.exit(0)
|
||||
|
||||
# All other modes require a recipe
|
||||
if not args.recipe:
|
||||
print("Error: recipe file required", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if not args.recipe.exists():
|
||||
print(f"Recipe not found: {args.recipe}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
@@ -320,12 +473,51 @@ Examples:
|
||||
pass # Keep as string
|
||||
params[key] = value
|
||||
|
||||
# Show plan mode - generate plan and display without executing
|
||||
if args.show_plan:
|
||||
recipe_text = args.recipe.read_text()
|
||||
recipe_dir = args.recipe.parent
|
||||
|
||||
# Compute recipe CID (content hash)
|
||||
recipe_cid, _ = unified_cache.content_store_string(recipe_text)
|
||||
|
||||
compiled = compile_string(recipe_text, params if params else None, recipe_dir=recipe_dir)
|
||||
|
||||
# Check for cached plan using unified cache (keyed by source CID + params)
|
||||
plan_cid = unified_cache.plan_exists(recipe_cid, params if params else None)
|
||||
|
||||
if plan_cid and not args.no_cache:
|
||||
print(f";; Cached plan CID: {plan_cid}", file=sys.stderr)
|
||||
plan_sexp_str = unified_cache.plan_load(recipe_cid, params if params else None)
|
||||
print(plan_sexp_str)
|
||||
else:
|
||||
print(f";; Generating new plan...", file=sys.stderr)
|
||||
analysis_data = {}
|
||||
def on_analysis(node_id: str, results: dict):
|
||||
analysis_data[node_id] = results
|
||||
|
||||
plan = create_plan(
|
||||
compiled,
|
||||
inputs={},
|
||||
recipe_dir=recipe_dir,
|
||||
on_analysis=on_analysis,
|
||||
)
|
||||
# Cache analysis tracks individually before serialization
|
||||
_cache_analysis_tracks(plan)
|
||||
plan_sexp_str = plan.to_string(pretty=True)
|
||||
|
||||
# Save to unified cache
|
||||
cache_id, plan_cid, plan_path = unified_cache.plan_store(recipe_cid, params if params else None, plan_sexp_str)
|
||||
print(f";; Saved: {cache_id[:16]}... → {plan_cid}", file=sys.stderr)
|
||||
print(plan_sexp_str)
|
||||
sys.exit(0)
|
||||
|
||||
result = run_staged_recipe(
|
||||
recipe_path=args.recipe,
|
||||
output_path=args.output,
|
||||
cache_dir=args.cache,
|
||||
params=params if params else None,
|
||||
verbose=not args.quiet,
|
||||
force_replan=args.no_cache,
|
||||
)
|
||||
|
||||
# Print final output path
|
||||
|
||||
Reference in New Issue
Block a user