Replace batch DAG system with streaming architecture

- Remove legacy_tasks.py, hybrid_state.py, render.py
- Remove old task modules (analyze, execute, execute_sexp, orchestrate)
- Add streaming interpreter from test repo
- Add sexp_effects with primitives and video effects
- Add streaming Celery task with CID-based asset resolution
- Support both CID and friendly name references for assets
- Add .dockerignore to prevent local clones from conflicting

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
giles
2026-02-02 19:10:11 +00:00
parent 270eeb3fcf
commit bb458aa924
107 changed files with 15830 additions and 3211 deletions

303
tasks/streaming.py Normal file
View File

@@ -0,0 +1,303 @@
"""
Streaming video rendering task.
Executes S-expression recipes for frame-by-frame video processing.
Supports CID and friendly name references for assets.
"""
import hashlib
import logging
import os
import sys
import tempfile
from pathlib import Path
from typing import Dict, Optional
from celery import current_task
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from celery_app import app
from cache_manager import get_cache_manager
logger = logging.getLogger(__name__)
def resolve_asset(ref: str, actor_id: Optional[str] = None) -> Optional[Path]:
"""
Resolve an asset reference (CID or friendly name) to a file path.
Args:
ref: CID or friendly name (e.g., "my-video" or "QmXyz...")
actor_id: User ID for friendly name resolution
Returns:
Path to the asset file, or None if not found
"""
cache_mgr = get_cache_manager()
# Try as direct CID first
path = cache_mgr.get_by_cid(ref)
if path and path.exists():
logger.info(f"Resolved {ref[:16]}... as CID to {path}")
return path
# Try as friendly name if actor_id provided
if actor_id:
import asyncio
from database import resolve_friendly_name
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
cid = loop.run_until_complete(resolve_friendly_name(actor_id, ref))
finally:
loop.close()
if cid:
path = cache_mgr.get_by_cid(cid)
if path and path.exists():
logger.info(f"Resolved '{ref}' via friendly name to {path}")
return path
except Exception as e:
logger.warning(f"Failed to resolve friendly name '{ref}': {e}")
logger.warning(f"Could not resolve asset reference: {ref}")
return None
class CIDVideoSource:
"""
Video source that resolves CIDs to file paths.
Wraps the streaming VideoSource to work with cached assets.
"""
def __init__(self, cid: str, fps: float = 30, actor_id: Optional[str] = None):
self.cid = cid
self.fps = fps
self.actor_id = actor_id
self._source = None
def _ensure_source(self):
if self._source is None:
path = resolve_asset(self.cid, self.actor_id)
if not path:
raise ValueError(f"Could not resolve video source: {self.cid}")
from streaming.stream_sexp_generic import VideoSource
# Import from primitives where VideoSource is defined
from sexp_effects.primitive_libs.streaming import VideoSource
self._source = VideoSource(str(path), self.fps)
def read_at(self, t: float):
self._ensure_source()
return self._source.read_at(t)
def read(self):
self._ensure_source()
return self._source.read()
@property
def size(self):
self._ensure_source()
return self._source.size
@property
def duration(self):
self._ensure_source()
return self._source._duration
def close(self):
if self._source:
self._source.close()
class CIDAudioAnalyzer:
"""
Audio analyzer that resolves CIDs to file paths.
"""
def __init__(self, cid: str, actor_id: Optional[str] = None):
self.cid = cid
self.actor_id = actor_id
self._analyzer = None
def _ensure_analyzer(self):
if self._analyzer is None:
path = resolve_asset(self.cid, self.actor_id)
if not path:
raise ValueError(f"Could not resolve audio source: {self.cid}")
from sexp_effects.primitive_libs.streaming import AudioAnalyzer
self._analyzer = AudioAnalyzer(str(path))
def get_energy(self, t: float) -> float:
self._ensure_analyzer()
return self._analyzer.get_energy(t)
def get_beat(self, t: float) -> bool:
self._ensure_analyzer()
return self._analyzer.get_beat(t)
def get_beat_count(self, t: float) -> int:
self._ensure_analyzer()
return self._analyzer.get_beat_count(t)
@property
def duration(self):
self._ensure_analyzer()
return self._analyzer.duration
def create_cid_primitives(actor_id: Optional[str] = None):
"""
Create CID-aware primitive functions.
Returns dict of primitives that resolve CIDs before creating sources.
"""
def prim_make_video_source_cid(cid: str, fps: float = 30):
return CIDVideoSource(cid, fps, actor_id)
def prim_make_audio_analyzer_cid(cid: str):
return CIDAudioAnalyzer(cid, actor_id)
return {
'streaming:make-video-source': prim_make_video_source_cid,
'streaming:make-audio-analyzer': prim_make_audio_analyzer_cid,
}
@app.task(bind=True, name='tasks.run_stream')
def run_stream(
self,
recipe_sexp: str,
output_name: str = "output.mp4",
duration: Optional[float] = None,
fps: Optional[float] = None,
actor_id: Optional[str] = None,
sources_sexp: Optional[str] = None,
audio_sexp: Optional[str] = None,
) -> dict:
"""
Execute a streaming S-expression recipe.
Args:
recipe_sexp: The recipe S-expression content
output_name: Name for the output file
duration: Optional duration override (seconds)
fps: Optional FPS override
actor_id: User ID for friendly name resolution
sources_sexp: Optional sources config S-expression
audio_sexp: Optional audio config S-expression
Returns:
Dict with output_cid, output_path, and status
"""
task_id = self.request.id
logger.info(f"Starting stream task {task_id}")
self.update_state(state='INITIALIZING', meta={'progress': 0})
# Get the app directory for primitive/effect paths
app_dir = Path(__file__).parent.parent # celery/
sexp_effects_dir = app_dir / "sexp_effects"
effects_dir = app_dir / "effects"
templates_dir = app_dir / "templates"
# Create temp directory for work
work_dir = Path(tempfile.mkdtemp(prefix="stream_"))
recipe_path = work_dir / "recipe.sexp"
output_path = work_dir / output_name
# Create symlinks to effect directories so relative paths work
(work_dir / "sexp_effects").symlink_to(sexp_effects_dir)
(work_dir / "effects").symlink_to(effects_dir)
(work_dir / "templates").symlink_to(templates_dir)
try:
# Write recipe to temp file
recipe_path.write_text(recipe_sexp)
# Write optional config files
sources_path = None
if sources_sexp:
sources_path = work_dir / "sources.sexp"
sources_path.write_text(sources_sexp)
audio_path = None
if audio_sexp:
audio_path = work_dir / "audio.sexp"
audio_path.write_text(audio_sexp)
self.update_state(state='RENDERING', meta={'progress': 5})
# Import the streaming interpreter
from streaming.stream_sexp_generic import StreamInterpreter
# Create interpreter
interp = StreamInterpreter(str(recipe_path))
# Set primitive library directory explicitly
interp.primitive_lib_dir = sexp_effects_dir / "primitive_libs"
if fps:
interp.config['fps'] = fps
if sources_path:
interp.sources_config = sources_path
if audio_path:
interp.audio_config = audio_path
# Override primitives with CID-aware versions
cid_prims = create_cid_primitives(actor_id)
interp.primitives.update(cid_prims)
# Run rendering to file
logger.info(f"Rendering to {output_path}")
interp.run(duration=duration, output=str(output_path))
self.update_state(state='CACHING', meta={'progress': 90})
# Store output in cache
if output_path.exists():
cache_mgr = get_cache_manager()
cached_file, ipfs_cid = cache_mgr.put(
source_path=output_path,
node_type="STREAM_OUTPUT",
node_id=f"stream_{task_id}",
)
logger.info(f"Stream output cached: CID={cached_file.cid}, IPFS={ipfs_cid}")
return {
"status": "completed",
"task_id": task_id,
"output_cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
"output_path": str(cached_file.path),
}
else:
return {
"status": "failed",
"task_id": task_id,
"error": "Output file not created",
}
except Exception as e:
logger.error(f"Stream task {task_id} failed: {e}")
import traceback
traceback.print_exc()
return {
"status": "failed",
"task_id": task_id,
"error": str(e),
}
finally:
# Cleanup temp directory
import shutil
if work_dir.exists():
shutil.rmtree(work_dir, ignore_errors=True)