Add streaming video compositor with sexp interpreter

- New streaming/ module for real-time video processing:
  - compositor.py: Main streaming compositor with cycle-crossfade
  - sexp_executor.py: Executes compiled sexp recipes in real-time
  - sexp_interp.py: Full S-expression interpreter for SLICE_ON Lambda
  - recipe_adapter.py: Bridges recipes to streaming compositor
  - sources.py: Video source with ffmpeg streaming
  - audio.py: Real-time audio analysis (energy, beats)
  - output.py: Preview (mpv) and file output with audio muxing

- New templates/:
  - cycle-crossfade.sexp: Smooth zoom-based video cycling
  - process-pair.sexp: Dual-clip processing with effects

- Key features:
  - Videos cycle in input-videos order (not definition order)
  - Cumulative whole-spin rotation
  - Zero-weight sources skip processing
  - Live audio-reactive effects

- New effects: blend_multi for weighted layer compositing
- Updated primitives and interpreter for streaming compatibility

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-29 01:27:39 +00:00
parent 17e3e23f06
commit d241e2a663
31 changed files with 5143 additions and 96 deletions

404
cache.py Normal file
View File

@@ -0,0 +1,404 @@
#!/usr/bin/env python3
"""
Unified content cache for artdag.
Design:
- IPNS (cache_id) = computation hash, known BEFORE execution
"What would be the result of running X with inputs Y?"
- CID = content hash, known AFTER execution
"What is this actual content?"
Structure:
.cache/
refs/ # IPNS → CID mappings (computation → result)
{cache_id} # Text file containing the CID of the result
content/ # Content-addressed storage
{cid[:2]}/{cid} # Actual content by CID
"""
import hashlib
import json
import os
from pathlib import Path
from typing import Optional, Dict, Any, Tuple
# Default cache location - can be overridden via ARTDAG_CACHE env var
DEFAULT_CACHE_DIR = Path(__file__).parent / ".cache"
def get_cache_dir() -> Path:
"""Get the cache directory, creating if needed."""
cache_dir = Path(os.environ.get("ARTDAG_CACHE", DEFAULT_CACHE_DIR))
cache_dir.mkdir(parents=True, exist_ok=True)
return cache_dir
def get_refs_dir() -> Path:
"""Get the refs directory (IPNS → CID mappings)."""
refs_dir = get_cache_dir() / "refs"
refs_dir.mkdir(parents=True, exist_ok=True)
return refs_dir
def get_content_dir() -> Path:
"""Get the content directory (CID → content)."""
content_dir = get_cache_dir() / "content"
content_dir.mkdir(parents=True, exist_ok=True)
return content_dir
# =============================================================================
# CID (Content Hash) Operations
# =============================================================================
def compute_cid(content: bytes) -> str:
"""Compute content ID (SHA256 hash) for bytes."""
return hashlib.sha256(content).hexdigest()
def compute_file_cid(file_path: Path) -> str:
"""Compute content ID for a file."""
with open(file_path, 'rb') as f:
return compute_cid(f.read())
def compute_string_cid(text: str) -> str:
"""Compute content ID for a string."""
return compute_cid(text.encode('utf-8'))
# =============================================================================
# Content Storage (by CID)
# =============================================================================
def _content_path(cid: str) -> Path:
"""Get path for content by CID."""
return get_content_dir() / cid[:2] / cid
def content_exists_by_cid(cid: str) -> Optional[Path]:
"""Check if content exists by CID."""
path = _content_path(cid)
if path.exists() and path.stat().st_size > 0:
return path
return None
def content_store_by_cid(cid: str, content: bytes) -> Path:
"""Store content by its CID."""
path = _content_path(cid)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(content)
return path
def content_store_file(file_path: Path) -> Tuple[str, Path]:
"""Store a file by its content hash. Returns (cid, path)."""
content = file_path.read_bytes()
cid = compute_cid(content)
path = content_store_by_cid(cid, content)
return cid, path
def content_store_string(text: str) -> Tuple[str, Path]:
"""Store a string by its content hash. Returns (cid, path)."""
content = text.encode('utf-8')
cid = compute_cid(content)
path = content_store_by_cid(cid, content)
return cid, path
def content_get(cid: str) -> Optional[bytes]:
"""Get content by CID."""
path = content_exists_by_cid(cid)
if path:
return path.read_bytes()
return None
def content_get_string(cid: str) -> Optional[str]:
"""Get string content by CID."""
content = content_get(cid)
if content:
return content.decode('utf-8')
return None
# =============================================================================
# Refs (IPNS → CID mappings)
# =============================================================================
def _ref_path(cache_id: str) -> Path:
"""Get path for a ref by cache_id."""
return get_refs_dir() / cache_id
def ref_exists(cache_id: str) -> Optional[str]:
"""Check if a ref exists. Returns CID if found."""
path = _ref_path(cache_id)
if path.exists():
return path.read_text().strip()
return None
def ref_set(cache_id: str, cid: str) -> Path:
"""Set a ref (IPNS → CID mapping)."""
path = _ref_path(cache_id)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(cid)
return path
def ref_get_content(cache_id: str) -> Optional[bytes]:
"""Get content by cache_id (looks up ref, then fetches content)."""
cid = ref_exists(cache_id)
if cid:
return content_get(cid)
return None
def ref_get_string(cache_id: str) -> Optional[str]:
"""Get string content by cache_id."""
content = ref_get_content(cache_id)
if content:
return content.decode('utf-8')
return None
# =============================================================================
# High-level Cache Operations
# =============================================================================
def cache_store(cache_id: str, content: bytes) -> Tuple[str, Path]:
"""
Store content with IPNS→CID indirection.
Args:
cache_id: Computation hash (IPNS address)
content: Content to store
Returns:
(cid, path) tuple
"""
cid = compute_cid(content)
path = content_store_by_cid(cid, content)
ref_set(cache_id, cid)
return cid, path
def cache_store_file(cache_id: str, file_path: Path) -> Tuple[str, Path]:
"""Store a file with IPNS→CID indirection."""
content = file_path.read_bytes()
return cache_store(cache_id, content)
def cache_store_string(cache_id: str, text: str) -> Tuple[str, Path]:
"""Store a string with IPNS→CID indirection."""
return cache_store(cache_id, text.encode('utf-8'))
def cache_store_json(cache_id: str, data: Any) -> Tuple[str, Path]:
"""Store JSON data with IPNS→CID indirection."""
text = json.dumps(data, indent=2)
return cache_store_string(cache_id, text)
def cache_exists(cache_id: str) -> Optional[Path]:
"""Check if cached content exists for a computation."""
cid = ref_exists(cache_id)
if cid:
return content_exists_by_cid(cid)
return None
def cache_get(cache_id: str) -> Optional[bytes]:
"""Get cached content by computation hash."""
return ref_get_content(cache_id)
def cache_get_string(cache_id: str) -> Optional[str]:
"""Get cached string by computation hash."""
return ref_get_string(cache_id)
def cache_get_json(cache_id: str) -> Optional[Any]:
"""Get cached JSON by computation hash."""
text = cache_get_string(cache_id)
if text:
return json.loads(text)
return None
def cache_get_path(cache_id: str) -> Optional[Path]:
"""Get path to cached content by computation hash."""
cid = ref_exists(cache_id)
if cid:
return content_exists_by_cid(cid)
return None
# =============================================================================
# Plan Cache (convenience wrappers)
# =============================================================================
def _stable_hash_params(params: Dict[str, Any]) -> str:
"""Compute stable hash of params using JSON + SHA256 (consistent with CID)."""
params_str = json.dumps(params, sort_keys=True, default=str)
return hashlib.sha256(params_str.encode()).hexdigest()
def plan_cache_id(source_cid: str, params: Dict[str, Any] = None) -> str:
"""
Compute the cache_id (IPNS address) for a plan.
Based on source CID + params. Name/version are just metadata.
"""
key = f"plan:{source_cid}"
if params:
params_hash = _stable_hash_params(params)
key = f"{key}:{params_hash}"
return hashlib.sha256(key.encode()).hexdigest()
def plan_exists(source_cid: str, params: Dict[str, Any] = None) -> Optional[str]:
"""Check if a cached plan exists. Returns CID if found."""
cache_id = plan_cache_id(source_cid, params)
return ref_exists(cache_id)
def plan_store(source_cid: str, params: Dict[str, Any], content: str) -> Tuple[str, str, Path]:
"""
Store a plan in the cache.
Returns:
(cache_id, cid, path) tuple
"""
cache_id = plan_cache_id(source_cid, params)
cid, path = cache_store_string(cache_id, content)
return cache_id, cid, path
def plan_load(source_cid: str, params: Dict[str, Any] = None) -> Optional[str]:
"""Load a plan from cache. Returns plan content string."""
cache_id = plan_cache_id(source_cid, params)
return cache_get_string(cache_id)
def plan_get_path(source_cid: str, params: Dict[str, Any] = None) -> Optional[Path]:
"""Get path to cached plan."""
cache_id = plan_cache_id(source_cid, params)
return cache_get_path(cache_id)
# =============================================================================
# Cache Listing
# =============================================================================
def list_cache(verbose: bool = False) -> Dict[str, Any]:
"""List all cached items."""
from datetime import datetime
cache_dir = get_cache_dir()
refs_dir = get_refs_dir()
content_dir = get_content_dir()
def format_size(size):
if size >= 1_000_000_000:
return f"{size / 1_000_000_000:.1f}GB"
elif size >= 1_000_000:
return f"{size / 1_000_000:.1f}MB"
elif size >= 1000:
return f"{size / 1000:.1f}KB"
else:
return f"{size}B"
def get_file_info(path: Path) -> Dict:
stat = path.stat()
return {
"path": path,
"name": path.name,
"size": stat.st_size,
"size_str": format_size(stat.st_size),
"mtime": datetime.fromtimestamp(stat.st_mtime),
}
result = {
"refs": [],
"content": [],
"summary": {"total_items": 0, "total_size": 0},
}
# Refs
if refs_dir.exists():
for f in sorted(refs_dir.iterdir()):
if f.is_file():
info = get_file_info(f)
info["cache_id"] = f.name
info["cid"] = f.read_text().strip()
# Try to determine type from content
cid = info["cid"]
content_path = content_exists_by_cid(cid)
if content_path:
info["content_size"] = content_path.stat().st_size
info["content_size_str"] = format_size(info["content_size"])
result["refs"].append(info)
# Content
if content_dir.exists():
for subdir in sorted(content_dir.iterdir()):
if subdir.is_dir():
for f in sorted(subdir.iterdir()):
if f.is_file():
info = get_file_info(f)
info["cid"] = f.name
result["content"].append(info)
# Summary
result["summary"]["total_refs"] = len(result["refs"])
result["summary"]["total_content"] = len(result["content"])
result["summary"]["total_size"] = sum(i["size"] for i in result["content"])
result["summary"]["total_size_str"] = format_size(result["summary"]["total_size"])
return result
def print_cache_listing(verbose: bool = False):
"""Print cache listing to stdout."""
info = list_cache(verbose)
cache_dir = get_cache_dir()
print(f"\nCache directory: {cache_dir}\n")
# Refs summary
if info["refs"]:
print(f"=== Refs ({len(info['refs'])}) ===")
for ref in info["refs"][:20]: # Show first 20
content_info = f"{ref.get('content_size_str', '?')}" if 'content_size_str' in ref else ""
print(f" {ref['cache_id'][:16]}... → {ref['cid'][:16]}...{content_info}")
if len(info["refs"]) > 20:
print(f" ... and {len(info['refs']) - 20} more")
print()
# Content by type
if info["content"]:
# Group by first 2 chars (subdirectory)
print(f"=== Content ({len(info['content'])} items, {info['summary']['total_size_str']}) ===")
for item in info["content"][:20]:
print(f" {item['cid'][:16]}... {item['size_str']:>8} {item['mtime'].strftime('%Y-%m-%d %H:%M')}")
if len(info["content"]) > 20:
print(f" ... and {len(info['content']) - 20} more")
print()
print(f"=== Summary ===")
print(f" Refs: {info['summary']['total_refs']}")
print(f" Content: {info['summary']['total_content']} ({info['summary']['total_size_str']})")
if verbose:
print(f"\nTo clear cache: rm -rf {cache_dir}/*")
if __name__ == "__main__":
import sys
verbose = "-v" in sys.argv or "--verbose" in sys.argv
print_cache_listing(verbose)