- New streaming/ module for real-time video processing: - compositor.py: Main streaming compositor with cycle-crossfade - sexp_executor.py: Executes compiled sexp recipes in real-time - sexp_interp.py: Full S-expression interpreter for SLICE_ON Lambda - recipe_adapter.py: Bridges recipes to streaming compositor - sources.py: Video source with ffmpeg streaming - audio.py: Real-time audio analysis (energy, beats) - output.py: Preview (mpv) and file output with audio muxing - New templates/: - cycle-crossfade.sexp: Smooth zoom-based video cycling - process-pair.sexp: Dual-clip processing with effects - Key features: - Videos cycle in input-videos order (not definition order) - Cumulative whole-spin rotation - Zero-weight sources skip processing - Live audio-reactive effects - New effects: blend_multi for weighted layer compositing - Updated primitives and interpreter for streaming compatibility Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
405 lines
12 KiB
Python
405 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Unified content cache for artdag.
|
|
|
|
Design:
|
|
- IPNS (cache_id) = computation hash, known BEFORE execution
|
|
"What would be the result of running X with inputs Y?"
|
|
|
|
- CID = content hash, known AFTER execution
|
|
"What is this actual content?"
|
|
|
|
Structure:
|
|
.cache/
|
|
refs/ # IPNS → CID mappings (computation → result)
|
|
{cache_id} # Text file containing the CID of the result
|
|
content/ # Content-addressed storage
|
|
{cid[:2]}/{cid} # Actual content by CID
|
|
"""
|
|
|
|
import hashlib
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Optional, Dict, Any, Tuple
|
|
|
|
# Default cache location - can be overridden via ARTDAG_CACHE env var
|
|
DEFAULT_CACHE_DIR = Path(__file__).parent / ".cache"
|
|
|
|
|
|
def get_cache_dir() -> Path:
|
|
"""Get the cache directory, creating if needed."""
|
|
cache_dir = Path(os.environ.get("ARTDAG_CACHE", DEFAULT_CACHE_DIR))
|
|
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
return cache_dir
|
|
|
|
|
|
def get_refs_dir() -> Path:
|
|
"""Get the refs directory (IPNS → CID mappings)."""
|
|
refs_dir = get_cache_dir() / "refs"
|
|
refs_dir.mkdir(parents=True, exist_ok=True)
|
|
return refs_dir
|
|
|
|
|
|
def get_content_dir() -> Path:
|
|
"""Get the content directory (CID → content)."""
|
|
content_dir = get_cache_dir() / "content"
|
|
content_dir.mkdir(parents=True, exist_ok=True)
|
|
return content_dir
|
|
|
|
|
|
# =============================================================================
|
|
# CID (Content Hash) Operations
|
|
# =============================================================================
|
|
|
|
def compute_cid(content: bytes) -> str:
|
|
"""Compute content ID (SHA256 hash) for bytes."""
|
|
return hashlib.sha256(content).hexdigest()
|
|
|
|
|
|
def compute_file_cid(file_path: Path) -> str:
|
|
"""Compute content ID for a file."""
|
|
with open(file_path, 'rb') as f:
|
|
return compute_cid(f.read())
|
|
|
|
|
|
def compute_string_cid(text: str) -> str:
|
|
"""Compute content ID for a string."""
|
|
return compute_cid(text.encode('utf-8'))
|
|
|
|
|
|
# =============================================================================
|
|
# Content Storage (by CID)
|
|
# =============================================================================
|
|
|
|
def _content_path(cid: str) -> Path:
|
|
"""Get path for content by CID."""
|
|
return get_content_dir() / cid[:2] / cid
|
|
|
|
|
|
def content_exists_by_cid(cid: str) -> Optional[Path]:
|
|
"""Check if content exists by CID."""
|
|
path = _content_path(cid)
|
|
if path.exists() and path.stat().st_size > 0:
|
|
return path
|
|
return None
|
|
|
|
|
|
def content_store_by_cid(cid: str, content: bytes) -> Path:
|
|
"""Store content by its CID."""
|
|
path = _content_path(cid)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_bytes(content)
|
|
return path
|
|
|
|
|
|
def content_store_file(file_path: Path) -> Tuple[str, Path]:
|
|
"""Store a file by its content hash. Returns (cid, path)."""
|
|
content = file_path.read_bytes()
|
|
cid = compute_cid(content)
|
|
path = content_store_by_cid(cid, content)
|
|
return cid, path
|
|
|
|
|
|
def content_store_string(text: str) -> Tuple[str, Path]:
|
|
"""Store a string by its content hash. Returns (cid, path)."""
|
|
content = text.encode('utf-8')
|
|
cid = compute_cid(content)
|
|
path = content_store_by_cid(cid, content)
|
|
return cid, path
|
|
|
|
|
|
def content_get(cid: str) -> Optional[bytes]:
|
|
"""Get content by CID."""
|
|
path = content_exists_by_cid(cid)
|
|
if path:
|
|
return path.read_bytes()
|
|
return None
|
|
|
|
|
|
def content_get_string(cid: str) -> Optional[str]:
|
|
"""Get string content by CID."""
|
|
content = content_get(cid)
|
|
if content:
|
|
return content.decode('utf-8')
|
|
return None
|
|
|
|
|
|
# =============================================================================
|
|
# Refs (IPNS → CID mappings)
|
|
# =============================================================================
|
|
|
|
def _ref_path(cache_id: str) -> Path:
|
|
"""Get path for a ref by cache_id."""
|
|
return get_refs_dir() / cache_id
|
|
|
|
|
|
def ref_exists(cache_id: str) -> Optional[str]:
|
|
"""Check if a ref exists. Returns CID if found."""
|
|
path = _ref_path(cache_id)
|
|
if path.exists():
|
|
return path.read_text().strip()
|
|
return None
|
|
|
|
|
|
def ref_set(cache_id: str, cid: str) -> Path:
|
|
"""Set a ref (IPNS → CID mapping)."""
|
|
path = _ref_path(cache_id)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_text(cid)
|
|
return path
|
|
|
|
|
|
def ref_get_content(cache_id: str) -> Optional[bytes]:
|
|
"""Get content by cache_id (looks up ref, then fetches content)."""
|
|
cid = ref_exists(cache_id)
|
|
if cid:
|
|
return content_get(cid)
|
|
return None
|
|
|
|
|
|
def ref_get_string(cache_id: str) -> Optional[str]:
|
|
"""Get string content by cache_id."""
|
|
content = ref_get_content(cache_id)
|
|
if content:
|
|
return content.decode('utf-8')
|
|
return None
|
|
|
|
|
|
# =============================================================================
|
|
# High-level Cache Operations
|
|
# =============================================================================
|
|
|
|
def cache_store(cache_id: str, content: bytes) -> Tuple[str, Path]:
|
|
"""
|
|
Store content with IPNS→CID indirection.
|
|
|
|
Args:
|
|
cache_id: Computation hash (IPNS address)
|
|
content: Content to store
|
|
|
|
Returns:
|
|
(cid, path) tuple
|
|
"""
|
|
cid = compute_cid(content)
|
|
path = content_store_by_cid(cid, content)
|
|
ref_set(cache_id, cid)
|
|
return cid, path
|
|
|
|
|
|
def cache_store_file(cache_id: str, file_path: Path) -> Tuple[str, Path]:
|
|
"""Store a file with IPNS→CID indirection."""
|
|
content = file_path.read_bytes()
|
|
return cache_store(cache_id, content)
|
|
|
|
|
|
def cache_store_string(cache_id: str, text: str) -> Tuple[str, Path]:
|
|
"""Store a string with IPNS→CID indirection."""
|
|
return cache_store(cache_id, text.encode('utf-8'))
|
|
|
|
|
|
def cache_store_json(cache_id: str, data: Any) -> Tuple[str, Path]:
|
|
"""Store JSON data with IPNS→CID indirection."""
|
|
text = json.dumps(data, indent=2)
|
|
return cache_store_string(cache_id, text)
|
|
|
|
|
|
def cache_exists(cache_id: str) -> Optional[Path]:
|
|
"""Check if cached content exists for a computation."""
|
|
cid = ref_exists(cache_id)
|
|
if cid:
|
|
return content_exists_by_cid(cid)
|
|
return None
|
|
|
|
|
|
def cache_get(cache_id: str) -> Optional[bytes]:
|
|
"""Get cached content by computation hash."""
|
|
return ref_get_content(cache_id)
|
|
|
|
|
|
def cache_get_string(cache_id: str) -> Optional[str]:
|
|
"""Get cached string by computation hash."""
|
|
return ref_get_string(cache_id)
|
|
|
|
|
|
def cache_get_json(cache_id: str) -> Optional[Any]:
|
|
"""Get cached JSON by computation hash."""
|
|
text = cache_get_string(cache_id)
|
|
if text:
|
|
return json.loads(text)
|
|
return None
|
|
|
|
|
|
def cache_get_path(cache_id: str) -> Optional[Path]:
|
|
"""Get path to cached content by computation hash."""
|
|
cid = ref_exists(cache_id)
|
|
if cid:
|
|
return content_exists_by_cid(cid)
|
|
return None
|
|
|
|
|
|
# =============================================================================
|
|
# Plan Cache (convenience wrappers)
|
|
# =============================================================================
|
|
|
|
def _stable_hash_params(params: Dict[str, Any]) -> str:
|
|
"""Compute stable hash of params using JSON + SHA256 (consistent with CID)."""
|
|
params_str = json.dumps(params, sort_keys=True, default=str)
|
|
return hashlib.sha256(params_str.encode()).hexdigest()
|
|
|
|
|
|
def plan_cache_id(source_cid: str, params: Dict[str, Any] = None) -> str:
|
|
"""
|
|
Compute the cache_id (IPNS address) for a plan.
|
|
|
|
Based on source CID + params. Name/version are just metadata.
|
|
"""
|
|
key = f"plan:{source_cid}"
|
|
if params:
|
|
params_hash = _stable_hash_params(params)
|
|
key = f"{key}:{params_hash}"
|
|
return hashlib.sha256(key.encode()).hexdigest()
|
|
|
|
|
|
def plan_exists(source_cid: str, params: Dict[str, Any] = None) -> Optional[str]:
|
|
"""Check if a cached plan exists. Returns CID if found."""
|
|
cache_id = plan_cache_id(source_cid, params)
|
|
return ref_exists(cache_id)
|
|
|
|
|
|
def plan_store(source_cid: str, params: Dict[str, Any], content: str) -> Tuple[str, str, Path]:
|
|
"""
|
|
Store a plan in the cache.
|
|
|
|
Returns:
|
|
(cache_id, cid, path) tuple
|
|
"""
|
|
cache_id = plan_cache_id(source_cid, params)
|
|
cid, path = cache_store_string(cache_id, content)
|
|
return cache_id, cid, path
|
|
|
|
|
|
def plan_load(source_cid: str, params: Dict[str, Any] = None) -> Optional[str]:
|
|
"""Load a plan from cache. Returns plan content string."""
|
|
cache_id = plan_cache_id(source_cid, params)
|
|
return cache_get_string(cache_id)
|
|
|
|
|
|
def plan_get_path(source_cid: str, params: Dict[str, Any] = None) -> Optional[Path]:
|
|
"""Get path to cached plan."""
|
|
cache_id = plan_cache_id(source_cid, params)
|
|
return cache_get_path(cache_id)
|
|
|
|
|
|
# =============================================================================
|
|
# Cache Listing
|
|
# =============================================================================
|
|
|
|
def list_cache(verbose: bool = False) -> Dict[str, Any]:
|
|
"""List all cached items."""
|
|
from datetime import datetime
|
|
|
|
cache_dir = get_cache_dir()
|
|
refs_dir = get_refs_dir()
|
|
content_dir = get_content_dir()
|
|
|
|
def format_size(size):
|
|
if size >= 1_000_000_000:
|
|
return f"{size / 1_000_000_000:.1f}GB"
|
|
elif size >= 1_000_000:
|
|
return f"{size / 1_000_000:.1f}MB"
|
|
elif size >= 1000:
|
|
return f"{size / 1000:.1f}KB"
|
|
else:
|
|
return f"{size}B"
|
|
|
|
def get_file_info(path: Path) -> Dict:
|
|
stat = path.stat()
|
|
return {
|
|
"path": path,
|
|
"name": path.name,
|
|
"size": stat.st_size,
|
|
"size_str": format_size(stat.st_size),
|
|
"mtime": datetime.fromtimestamp(stat.st_mtime),
|
|
}
|
|
|
|
result = {
|
|
"refs": [],
|
|
"content": [],
|
|
"summary": {"total_items": 0, "total_size": 0},
|
|
}
|
|
|
|
# Refs
|
|
if refs_dir.exists():
|
|
for f in sorted(refs_dir.iterdir()):
|
|
if f.is_file():
|
|
info = get_file_info(f)
|
|
info["cache_id"] = f.name
|
|
info["cid"] = f.read_text().strip()
|
|
# Try to determine type from content
|
|
cid = info["cid"]
|
|
content_path = content_exists_by_cid(cid)
|
|
if content_path:
|
|
info["content_size"] = content_path.stat().st_size
|
|
info["content_size_str"] = format_size(info["content_size"])
|
|
result["refs"].append(info)
|
|
|
|
# Content
|
|
if content_dir.exists():
|
|
for subdir in sorted(content_dir.iterdir()):
|
|
if subdir.is_dir():
|
|
for f in sorted(subdir.iterdir()):
|
|
if f.is_file():
|
|
info = get_file_info(f)
|
|
info["cid"] = f.name
|
|
result["content"].append(info)
|
|
|
|
# Summary
|
|
result["summary"]["total_refs"] = len(result["refs"])
|
|
result["summary"]["total_content"] = len(result["content"])
|
|
result["summary"]["total_size"] = sum(i["size"] for i in result["content"])
|
|
result["summary"]["total_size_str"] = format_size(result["summary"]["total_size"])
|
|
|
|
return result
|
|
|
|
|
|
def print_cache_listing(verbose: bool = False):
|
|
"""Print cache listing to stdout."""
|
|
info = list_cache(verbose)
|
|
cache_dir = get_cache_dir()
|
|
|
|
print(f"\nCache directory: {cache_dir}\n")
|
|
|
|
# Refs summary
|
|
if info["refs"]:
|
|
print(f"=== Refs ({len(info['refs'])}) ===")
|
|
for ref in info["refs"][:20]: # Show first 20
|
|
content_info = f" → {ref.get('content_size_str', '?')}" if 'content_size_str' in ref else ""
|
|
print(f" {ref['cache_id'][:16]}... → {ref['cid'][:16]}...{content_info}")
|
|
if len(info["refs"]) > 20:
|
|
print(f" ... and {len(info['refs']) - 20} more")
|
|
print()
|
|
|
|
# Content by type
|
|
if info["content"]:
|
|
# Group by first 2 chars (subdirectory)
|
|
print(f"=== Content ({len(info['content'])} items, {info['summary']['total_size_str']}) ===")
|
|
for item in info["content"][:20]:
|
|
print(f" {item['cid'][:16]}... {item['size_str']:>8} {item['mtime'].strftime('%Y-%m-%d %H:%M')}")
|
|
if len(info["content"]) > 20:
|
|
print(f" ... and {len(info['content']) - 20} more")
|
|
print()
|
|
|
|
print(f"=== Summary ===")
|
|
print(f" Refs: {info['summary']['total_refs']}")
|
|
print(f" Content: {info['summary']['total_content']} ({info['summary']['total_size_str']})")
|
|
|
|
if verbose:
|
|
print(f"\nTo clear cache: rm -rf {cache_dir}/*")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
verbose = "-v" in sys.argv or "--verbose" in sys.argv
|
|
print_cache_listing(verbose)
|