Add streaming video compositor with sexp interpreter

- New streaming/ module for real-time video processing: - compositor.py: Main streaming compositor with cycle-crossfade - sexp_executor.py: Executes compiled sexp recipes in real-time - sexp_interp.py: Full S-expression interpreter for SLICE_ON Lambda - recipe_adapter.py: Bridges recipes to streaming compositor - sources.py: Video source with ffmpeg streaming - audio.py: Real-time audio analysis (energy, beats) - output.py: Preview (mpv) and file output with audio muxing - New templates/: - cycle-crossfade.sexp: Smooth zoom-based video cycling - process-pair.sexp: Dual-clip processing with effects - Key features: - Videos cycle in input-videos order (not definition order) - Cumulative whole-spin rotation - Zero-weight sources skip processing - Live audio-reactive effects - New effects: blend_multi for weighted layer compositing - Updated primitives and interpreter for streaming compatibility Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 01:27:39 +00:00
parent 17e3e23f06
commit d241e2a663
31 changed files with 5143 additions and 96 deletions
--- a/cache.py
+++ b/cache.py
@@ -0,0 +1,404 @@
+#!/usr/bin/env python3
+"""
+Unified content cache for artdag.
+
+Design:
+    - IPNS (cache_id) = computation hash, known BEFORE execution
+      "What would be the result of running X with inputs Y?"
+
+    - CID = content hash, known AFTER execution
+      "What is this actual content?"
+
+Structure:
+    .cache/
+        refs/             # IPNS → CID mappings (computation → result)
+            {cache_id}    # Text file containing the CID of the result
+        content/          # Content-addressed storage
+            {cid[:2]}/{cid}   # Actual content by CID
+"""
+
+import hashlib
+import json
+import os
+from pathlib import Path
+from typing import Optional, Dict, Any, Tuple
+
+# Default cache location - can be overridden via ARTDAG_CACHE env var
+DEFAULT_CACHE_DIR = Path(__file__).parent / ".cache"
+
+
+def get_cache_dir() -> Path:
+    """Get the cache directory, creating if needed."""
+    cache_dir = Path(os.environ.get("ARTDAG_CACHE", DEFAULT_CACHE_DIR))
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    return cache_dir
+
+
+def get_refs_dir() -> Path:
+    """Get the refs directory (IPNS → CID mappings)."""
+    refs_dir = get_cache_dir() / "refs"
+    refs_dir.mkdir(parents=True, exist_ok=True)
+    return refs_dir
+
+
+def get_content_dir() -> Path:
+    """Get the content directory (CID → content)."""
+    content_dir = get_cache_dir() / "content"
+    content_dir.mkdir(parents=True, exist_ok=True)
+    return content_dir
+
+
+# =============================================================================
+# CID (Content Hash) Operations
+# =============================================================================
+
+def compute_cid(content: bytes) -> str:
+    """Compute content ID (SHA256 hash) for bytes."""
+    return hashlib.sha256(content).hexdigest()
+
+
+def compute_file_cid(file_path: Path) -> str:
+    """Compute content ID for a file."""
+    with open(file_path, 'rb') as f:
+        return compute_cid(f.read())
+
+
+def compute_string_cid(text: str) -> str:
+    """Compute content ID for a string."""
+    return compute_cid(text.encode('utf-8'))
+
+
+# =============================================================================
+# Content Storage (by CID)
+# =============================================================================
+
+def _content_path(cid: str) -> Path:
+    """Get path for content by CID."""
+    return get_content_dir() / cid[:2] / cid
+
+
+def content_exists_by_cid(cid: str) -> Optional[Path]:
+    """Check if content exists by CID."""
+    path = _content_path(cid)
+    if path.exists() and path.stat().st_size > 0:
+        return path
+    return None
+
+
+def content_store_by_cid(cid: str, content: bytes) -> Path:
+    """Store content by its CID."""
+    path = _content_path(cid)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_bytes(content)
+    return path
+
+
+def content_store_file(file_path: Path) -> Tuple[str, Path]:
+    """Store a file by its content hash. Returns (cid, path)."""
+    content = file_path.read_bytes()
+    cid = compute_cid(content)
+    path = content_store_by_cid(cid, content)
+    return cid, path
+
+
+def content_store_string(text: str) -> Tuple[str, Path]:
+    """Store a string by its content hash. Returns (cid, path)."""
+    content = text.encode('utf-8')
+    cid = compute_cid(content)
+    path = content_store_by_cid(cid, content)
+    return cid, path
+
+
+def content_get(cid: str) -> Optional[bytes]:
+    """Get content by CID."""
+    path = content_exists_by_cid(cid)
+    if path:
+        return path.read_bytes()
+    return None
+
+
+def content_get_string(cid: str) -> Optional[str]:
+    """Get string content by CID."""
+    content = content_get(cid)
+    if content:
+        return content.decode('utf-8')
+    return None
+
+
+# =============================================================================
+# Refs (IPNS → CID mappings)
+# =============================================================================
+
+def _ref_path(cache_id: str) -> Path:
+    """Get path for a ref by cache_id."""
+    return get_refs_dir() / cache_id
+
+
+def ref_exists(cache_id: str) -> Optional[str]:
+    """Check if a ref exists. Returns CID if found."""
+    path = _ref_path(cache_id)
+    if path.exists():
+        return path.read_text().strip()
+    return None
+
+
+def ref_set(cache_id: str, cid: str) -> Path:
+    """Set a ref (IPNS → CID mapping)."""
+    path = _ref_path(cache_id)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(cid)
+    return path
+
+
+def ref_get_content(cache_id: str) -> Optional[bytes]:
+    """Get content by cache_id (looks up ref, then fetches content)."""
+    cid = ref_exists(cache_id)
+    if cid:
+        return content_get(cid)
+    return None
+
+
+def ref_get_string(cache_id: str) -> Optional[str]:
+    """Get string content by cache_id."""
+    content = ref_get_content(cache_id)
+    if content:
+        return content.decode('utf-8')
+    return None
+
+
+# =============================================================================
+# High-level Cache Operations
+# =============================================================================
+
+def cache_store(cache_id: str, content: bytes) -> Tuple[str, Path]:
+    """
+    Store content with IPNS→CID indirection.
+
+    Args:
+        cache_id: Computation hash (IPNS address)
+        content: Content to store
+
+    Returns:
+        (cid, path) tuple
+    """
+    cid = compute_cid(content)
+    path = content_store_by_cid(cid, content)
+    ref_set(cache_id, cid)
+    return cid, path
+
+
+def cache_store_file(cache_id: str, file_path: Path) -> Tuple[str, Path]:
+    """Store a file with IPNS→CID indirection."""
+    content = file_path.read_bytes()
+    return cache_store(cache_id, content)
+
+
+def cache_store_string(cache_id: str, text: str) -> Tuple[str, Path]:
+    """Store a string with IPNS→CID indirection."""
+    return cache_store(cache_id, text.encode('utf-8'))
+
+
+def cache_store_json(cache_id: str, data: Any) -> Tuple[str, Path]:
+    """Store JSON data with IPNS→CID indirection."""
+    text = json.dumps(data, indent=2)
+    return cache_store_string(cache_id, text)
+
+
+def cache_exists(cache_id: str) -> Optional[Path]:
+    """Check if cached content exists for a computation."""
+    cid = ref_exists(cache_id)
+    if cid:
+        return content_exists_by_cid(cid)
+    return None
+
+
+def cache_get(cache_id: str) -> Optional[bytes]:
+    """Get cached content by computation hash."""
+    return ref_get_content(cache_id)
+
+
+def cache_get_string(cache_id: str) -> Optional[str]:
+    """Get cached string by computation hash."""
+    return ref_get_string(cache_id)
+
+
+def cache_get_json(cache_id: str) -> Optional[Any]:
+    """Get cached JSON by computation hash."""
+    text = cache_get_string(cache_id)
+    if text:
+        return json.loads(text)
+    return None
+
+
+def cache_get_path(cache_id: str) -> Optional[Path]:
+    """Get path to cached content by computation hash."""
+    cid = ref_exists(cache_id)
+    if cid:
+        return content_exists_by_cid(cid)
+    return None
+
+
+# =============================================================================
+# Plan Cache (convenience wrappers)
+# =============================================================================
+
+def _stable_hash_params(params: Dict[str, Any]) -> str:
+    """Compute stable hash of params using JSON + SHA256 (consistent with CID)."""
+    params_str = json.dumps(params, sort_keys=True, default=str)
+    return hashlib.sha256(params_str.encode()).hexdigest()
+
+
+def plan_cache_id(source_cid: str, params: Dict[str, Any] = None) -> str:
+    """
+    Compute the cache_id (IPNS address) for a plan.
+
+    Based on source CID + params. Name/version are just metadata.
+    """
+    key = f"plan:{source_cid}"
+    if params:
+        params_hash = _stable_hash_params(params)
+        key = f"{key}:{params_hash}"
+    return hashlib.sha256(key.encode()).hexdigest()
+
+
+def plan_exists(source_cid: str, params: Dict[str, Any] = None) -> Optional[str]:
+    """Check if a cached plan exists. Returns CID if found."""
+    cache_id = plan_cache_id(source_cid, params)
+    return ref_exists(cache_id)
+
+
+def plan_store(source_cid: str, params: Dict[str, Any], content: str) -> Tuple[str, str, Path]:
+    """
+    Store a plan in the cache.
+
+    Returns:
+        (cache_id, cid, path) tuple
+    """
+    cache_id = plan_cache_id(source_cid, params)
+    cid, path = cache_store_string(cache_id, content)
+    return cache_id, cid, path
+
+
+def plan_load(source_cid: str, params: Dict[str, Any] = None) -> Optional[str]:
+    """Load a plan from cache. Returns plan content string."""
+    cache_id = plan_cache_id(source_cid, params)
+    return cache_get_string(cache_id)
+
+
+def plan_get_path(source_cid: str, params: Dict[str, Any] = None) -> Optional[Path]:
+    """Get path to cached plan."""
+    cache_id = plan_cache_id(source_cid, params)
+    return cache_get_path(cache_id)
+
+
+# =============================================================================
+# Cache Listing
+# =============================================================================
+
+def list_cache(verbose: bool = False) -> Dict[str, Any]:
+    """List all cached items."""
+    from datetime import datetime
+
+    cache_dir = get_cache_dir()
+    refs_dir = get_refs_dir()
+    content_dir = get_content_dir()
+
+    def format_size(size):
+        if size >= 1_000_000_000:
+            return f"{size / 1_000_000_000:.1f}GB"
+        elif size >= 1_000_000:
+            return f"{size / 1_000_000:.1f}MB"
+        elif size >= 1000:
+            return f"{size / 1000:.1f}KB"
+        else:
+            return f"{size}B"
+
+    def get_file_info(path: Path) -> Dict:
+        stat = path.stat()
+        return {
+            "path": path,
+            "name": path.name,
+            "size": stat.st_size,
+            "size_str": format_size(stat.st_size),
+            "mtime": datetime.fromtimestamp(stat.st_mtime),
+        }
+
+    result = {
+        "refs": [],
+        "content": [],
+        "summary": {"total_items": 0, "total_size": 0},
+    }
+
+    # Refs
+    if refs_dir.exists():
+        for f in sorted(refs_dir.iterdir()):
+            if f.is_file():
+                info = get_file_info(f)
+                info["cache_id"] = f.name
+                info["cid"] = f.read_text().strip()
+                # Try to determine type from content
+                cid = info["cid"]
+                content_path = content_exists_by_cid(cid)
+                if content_path:
+                    info["content_size"] = content_path.stat().st_size
+                    info["content_size_str"] = format_size(info["content_size"])
+                result["refs"].append(info)
+
+    # Content
+    if content_dir.exists():
+        for subdir in sorted(content_dir.iterdir()):
+            if subdir.is_dir():
+                for f in sorted(subdir.iterdir()):
+                    if f.is_file():
+                        info = get_file_info(f)
+                        info["cid"] = f.name
+                        result["content"].append(info)
+
+    # Summary
+    result["summary"]["total_refs"] = len(result["refs"])
+    result["summary"]["total_content"] = len(result["content"])
+    result["summary"]["total_size"] = sum(i["size"] for i in result["content"])
+    result["summary"]["total_size_str"] = format_size(result["summary"]["total_size"])
+
+    return result
+
+
+def print_cache_listing(verbose: bool = False):
+    """Print cache listing to stdout."""
+    info = list_cache(verbose)
+    cache_dir = get_cache_dir()
+
+    print(f"\nCache directory: {cache_dir}\n")
+
+    # Refs summary
+    if info["refs"]:
+        print(f"=== Refs ({len(info['refs'])}) ===")
+        for ref in info["refs"][:20]:  # Show first 20
+            content_info = f" → {ref.get('content_size_str', '?')}" if 'content_size_str' in ref else ""
+            print(f"  {ref['cache_id'][:16]}... → {ref['cid'][:16]}...{content_info}")
+        if len(info["refs"]) > 20:
+            print(f"  ... and {len(info['refs']) - 20} more")
+        print()
+
+    # Content by type
+    if info["content"]:
+        # Group by first 2 chars (subdirectory)
+        print(f"=== Content ({len(info['content'])} items, {info['summary']['total_size_str']}) ===")
+        for item in info["content"][:20]:
+            print(f"  {item['cid'][:16]}...  {item['size_str']:>8}  {item['mtime'].strftime('%Y-%m-%d %H:%M')}")
+        if len(info["content"]) > 20:
+            print(f"  ... and {len(info['content']) - 20} more")
+        print()
+
+    print(f"=== Summary ===")
+    print(f"  Refs:    {info['summary']['total_refs']}")
+    print(f"  Content: {info['summary']['total_content']} ({info['summary']['total_size_str']})")
+
+    if verbose:
+        print(f"\nTo clear cache: rm -rf {cache_dir}/*")
+
+
+if __name__ == "__main__":
+    import sys
+    verbose = "-v" in sys.argv or "--verbose" in sys.argv
+    print_cache_listing(verbose)