Add analysis caching and segment looping for short videos

- Add _cache_analysis_tracks() to cache each analysis track individually
  with content-hash IDs, replacing inline data with cache-id refs
- Add _resolve_analysis_refs() to resolve cache-id refs back to full data
- Add extract_segment_with_loop() helper that detects when output is
  shorter than requested duration and re-runs with -stream_loop -1
- Update COMPOUND handler's FFmpeg and Python paths to use looping
- This fixes videos shorter than audio duration being truncated

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-28 18:51:28 +00:00
parent 45f9171701
commit 17e3e23f06
2 changed files with 1210 additions and 158 deletions

1148
execute.py

File diff suppressed because it is too large Load Diff

View File

@@ -18,6 +18,7 @@ The script:
3. Produce final output
"""
import os
import sys
import json
import tempfile
@@ -30,9 +31,37 @@ from typing import Dict, List, Optional, Any
sys.path.insert(0, str(Path(__file__).parent.parent / "artdag"))
from artdag.sexp import compile_string, parse
from artdag.sexp.parser import Symbol, Keyword
from artdag.sexp.parser import Symbol, Keyword, serialize
from artdag.sexp.planner import create_plan
# Import unified cache
import cache as unified_cache
import hashlib
def _cache_analysis_tracks(plan):
"""Cache each analysis track individually, replace data with cache-id refs."""
import json as _json
for name, data in plan.analysis.items():
json_str = _json.dumps(data, sort_keys=True)
content_cid = hashlib.sha256(json_str.encode()).hexdigest()
unified_cache.cache_store_json(content_cid, data)
plan.analysis[name] = {"_cache_id": content_cid}
def _resolve_analysis_refs(analysis_dict):
"""Resolve cache-id refs back to full analysis data."""
resolved = {}
for name, data in analysis_dict.items():
if isinstance(data, dict) and "_cache_id" in data:
loaded = unified_cache.cache_get_json(data["_cache_id"])
if loaded:
resolved[name] = loaded
else:
resolved[name] = data
return resolved
def run_staged_recipe(
recipe_path: Path,
@@ -40,6 +69,7 @@ def run_staged_recipe(
cache_dir: Optional[Path] = None,
params: Optional[Dict[str, Any]] = None,
verbose: bool = True,
force_replan: bool = False,
) -> Path:
"""
Run a staged recipe with stage-level caching.
@@ -57,21 +87,56 @@ def run_staged_recipe(
recipe_text = recipe_path.read_text()
recipe_dir = recipe_path.parent
# Set up cache directory
if cache_dir is None:
cache_dir = recipe_dir / ".stage_cache"
cache_dir.mkdir(parents=True, exist_ok=True)
# Use unified cache
content_cache_dir = unified_cache.get_content_dir()
def log(msg: str):
if verbose:
print(msg, file=sys.stderr)
# Store recipe source by CID
recipe_cid, _ = unified_cache.content_store_string(recipe_text)
log(f"Recipe CID: {recipe_cid[:16]}...")
# Compile recipe
log(f"Compiling: {recipe_path}")
compiled = compile_string(recipe_text, params)
compiled = compile_string(recipe_text, params, recipe_dir=recipe_dir)
log(f"Recipe: {compiled.name} v{compiled.version}")
log(f"Nodes: {len(compiled.nodes)}")
# Store effects by CID
for effect_name, effect_info in compiled.registry.get("effects", {}).items():
effect_path = effect_info.get("path")
effect_cid = effect_info.get("cid")
if effect_path and effect_cid:
effect_file = Path(effect_path)
if effect_file.exists():
stored_cid, _ = unified_cache.content_store_file(effect_file)
if stored_cid == effect_cid:
log(f"Effect '{effect_name}' CID: {effect_cid[:16]}...")
else:
log(f"Warning: Effect '{effect_name}' CID mismatch")
# Store analyzers by CID
for analyzer_name, analyzer_info in compiled.registry.get("analyzers", {}).items():
analyzer_path = analyzer_info.get("path")
analyzer_cid = analyzer_info.get("cid")
if analyzer_path:
analyzer_file = Path(analyzer_path) if Path(analyzer_path).is_absolute() else recipe_dir / analyzer_path
if analyzer_file.exists():
stored_cid, _ = unified_cache.content_store_file(analyzer_file)
log(f"Analyzer '{analyzer_name}' CID: {stored_cid[:16]}...")
# Store included files by CID
for include_path, include_cid in compiled.registry.get("includes", {}).items():
include_file = Path(include_path)
if include_file.exists():
stored_cid, _ = unified_cache.content_store_file(include_file)
if stored_cid == include_cid:
log(f"Include '{include_file.name}' CID: {include_cid[:16]}...")
else:
log(f"Warning: Include '{include_file.name}' CID mismatch")
# Check for stages
if not compiled.stages:
log("No stages found - running as regular recipe")
@@ -96,6 +161,53 @@ def run_staged_recipe(
times = results.get("times", [])
log(f" Analysis complete: {node_id[:16]}... ({len(times)} times)")
# Check for cached plan using unified cache
plan_cid = unified_cache.plan_exists(recipe_cid, params)
if plan_cid and not force_replan:
plan_cache_path = unified_cache.plan_get_path(recipe_cid, params)
log(f"\nFound cached plan: {plan_cid[:16]}...")
plan_sexp_str = unified_cache.plan_load(recipe_cid, params)
# Parse the cached plan
from execute import parse_plan_input
plan_dict = parse_plan_input(plan_sexp_str)
# Resolve cache-id refs in plan's embedded analysis
if "analysis" in plan_dict:
plan_dict["analysis"] = _resolve_analysis_refs(plan_dict["analysis"])
# Load analysis data from unified cache
analysis_data = {}
for step in plan_dict.get("steps", []):
if step.get("node_type") == "ANALYZE":
step_id = step.get("step_id")
cached_analysis = unified_cache.cache_get_json(step_id)
if cached_analysis:
analysis_data[step_id] = cached_analysis
log(f" Loaded analysis: {step_id[:16]}...")
log(f"Plan ID: {plan_dict.get('plan_id', 'unknown')[:16]}...")
log(f"Steps: {len(plan_dict.get('steps', []))}")
log(f"Analysis tracks: {list(analysis_data.keys())}")
# Execute directly from cached plan
log("\n--- Execution (from cached plan) ---")
from execute import execute_plan
result_path = execute_plan(
plan_path=plan_cache_path,
output_path=output_path,
recipe_dir=recipe_dir,
external_analysis=analysis_data,
cache_dir=content_cache_dir,
)
log(f"\n--- Complete ---")
log(f"Output: {result_path}")
return result_path
# No cached plan - create new one
plan = create_plan(
compiled,
inputs={},
@@ -105,18 +217,29 @@ def run_staged_recipe(
log(f"\nPlan ID: {plan.plan_id[:16]}...")
log(f"Steps: {len(plan.steps)}")
log(f"Analysis tracks: {list(analysis_data.keys())}")
# Cache analysis tracks individually and replace with cache-id refs
_cache_analysis_tracks(plan)
# Save plan to unified cache
plan_sexp_str = plan.to_string(pretty=True)
plan_cache_id, plan_cid, plan_cache_path = unified_cache.plan_store(recipe_cid, params, plan_sexp_str)
log(f"Saved plan: {plan_cache_id[:16]}... → {plan_cid[:16]}...")
# Execute the plan using execute.py logic
log("\n--- Execution ---")
from execute import execute_plan
# Resolve cache-id refs back to full data for execution
resolved_analysis = _resolve_analysis_refs(plan.analysis)
plan_dict = {
"plan_id": plan.plan_id,
"recipe_id": compiled.name,
"recipe_hash": plan.recipe_hash,
"source_hash": plan.source_hash,
"encoding": compiled.encoding,
"output_step_id": plan.output_step_id,
"analysis": analysis_data,
"analysis": {**resolved_analysis, **analysis_data},
"effects_registry": plan.effects_registry,
"minimal_primitives": plan.minimal_primitives,
"steps": [],
@@ -134,16 +257,16 @@ def run_staged_recipe(
# Tag with stage info if present
if step.stage:
step_dict["stage"] = step.stage
step_dict["stage_cache_id"] = step.stage_cache_id
plan_dict["steps"].append(step_dict)
# Execute
# Execute using unified cache
result_path = execute_plan(
plan_path=None,
output_path=output_path,
recipe_dir=recipe_dir,
plan_data=plan_dict,
external_analysis=analysis_data,
cache_dir=content_cache_dir,
)
log(f"\n--- Complete ---")
@@ -162,6 +285,11 @@ def _run_non_staged(compiled, recipe_dir: Path, output_path: Optional[Path], ver
raise NotImplementedError("Non-staged recipes should use plan.py | execute.py")
def list_cache(verbose: bool = False):
"""List all cached items using the unified cache."""
unified_cache.print_cache_listing(verbose)
def list_params(recipe_path: Path):
"""List available parameters for a recipe and its effects."""
from artdag.sexp import parse
@@ -283,16 +411,41 @@ Examples:
python3 run_staged.py recipe.sexp -p color_mode=lime -p char_jitter=5
"""
)
parser.add_argument("recipe", type=Path, help="Recipe file (.sexp)")
parser.add_argument("recipe", type=Path, nargs="?", help="Recipe file (.sexp)")
parser.add_argument("-o", "--output", type=Path, help="Output file path")
parser.add_argument("-c", "--cache", type=Path, help="Stage cache directory")
parser.add_argument("-p", "--param", action="append", dest="params",
metavar="KEY=VALUE", help="Set recipe parameter")
parser.add_argument("-q", "--quiet", action="store_true", help="Suppress progress output")
parser.add_argument("--list-params", action="store_true", help="List available parameters and exit")
parser.add_argument("--list-cache", action="store_true", help="List cached items and exit")
parser.add_argument("--no-cache", action="store_true", help="Ignore cached plan, force re-planning")
parser.add_argument("--show-plan", action="store_true", help="Show the plan S-expression and exit (don't execute)")
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
parser.add_argument("-j", "--jobs", type=int, default=None,
help="Max parallel workers (default: 4, or ARTDAG_WORKERS env)")
parser.add_argument("--pipelines", type=int, default=None,
help="Max concurrent video pipelines (default: 1, or ARTDAG_VIDEO_PIPELINES env)")
args = parser.parse_args()
# Apply concurrency limits before any execution
if args.jobs is not None:
os.environ["ARTDAG_WORKERS"] = str(args.jobs)
if args.pipelines is not None:
os.environ["ARTDAG_VIDEO_PIPELINES"] = str(args.pipelines)
from execute import set_max_video_pipelines
set_max_video_pipelines(args.pipelines)
# List cache mode - doesn't require recipe
if args.list_cache:
list_cache(verbose=args.verbose)
sys.exit(0)
# All other modes require a recipe
if not args.recipe:
print("Error: recipe file required", file=sys.stderr)
sys.exit(1)
if not args.recipe.exists():
print(f"Recipe not found: {args.recipe}", file=sys.stderr)
sys.exit(1)
@@ -320,12 +473,51 @@ Examples:
pass # Keep as string
params[key] = value
# Show plan mode - generate plan and display without executing
if args.show_plan:
recipe_text = args.recipe.read_text()
recipe_dir = args.recipe.parent
# Compute recipe CID (content hash)
recipe_cid, _ = unified_cache.content_store_string(recipe_text)
compiled = compile_string(recipe_text, params if params else None, recipe_dir=recipe_dir)
# Check for cached plan using unified cache (keyed by source CID + params)
plan_cid = unified_cache.plan_exists(recipe_cid, params if params else None)
if plan_cid and not args.no_cache:
print(f";; Cached plan CID: {plan_cid}", file=sys.stderr)
plan_sexp_str = unified_cache.plan_load(recipe_cid, params if params else None)
print(plan_sexp_str)
else:
print(f";; Generating new plan...", file=sys.stderr)
analysis_data = {}
def on_analysis(node_id: str, results: dict):
analysis_data[node_id] = results
plan = create_plan(
compiled,
inputs={},
recipe_dir=recipe_dir,
on_analysis=on_analysis,
)
# Cache analysis tracks individually before serialization
_cache_analysis_tracks(plan)
plan_sexp_str = plan.to_string(pretty=True)
# Save to unified cache
cache_id, plan_cid, plan_path = unified_cache.plan_store(recipe_cid, params if params else None, plan_sexp_str)
print(f";; Saved: {cache_id[:16]}... → {plan_cid}", file=sys.stderr)
print(plan_sexp_str)
sys.exit(0)
result = run_staged_recipe(
recipe_path=args.recipe,
output_path=args.output,
cache_dir=args.cache,
params=params if params else None,
verbose=not args.quiet,
force_replan=args.no_cache,
)
# Print final output path