Files
rose-ash/l1/tasks/streaming.py
2026-02-24 23:07:19 +00:00

725 lines
29 KiB
Python

"""
Streaming video rendering task.
Executes S-expression recipes for frame-by-frame video processing.
Supports CID and friendly name references for assets.
Supports pause/resume/restart for long renders.
"""
import hashlib
import logging
import os
import signal
import sys
import tempfile
from pathlib import Path
from typing import Dict, Optional
from celery import current_task
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from celery_app import app
from cache_manager import get_cache_manager
logger = logging.getLogger(__name__)
class PauseRequested(Exception):
"""Raised when user requests pause via SIGTERM."""
pass
# Debug: verify module is being loaded
print(f"DEBUG MODULE LOAD: tasks/streaming.py loaded at {__file__}", file=sys.stderr)
# Module-level event loop for database operations
_resolve_loop = None
_db_initialized = False
def resolve_asset(ref: str, actor_id: Optional[str] = None) -> Optional[Path]:
"""
Resolve an asset reference (CID or friendly name) to a file path.
Args:
ref: CID or friendly name (e.g., "my-video" or "QmXyz...")
actor_id: User ID for friendly name resolution
Returns:
Path to the asset file, or None if not found
"""
global _resolve_loop, _db_initialized
import sys
print(f"RESOLVE_ASSET: ref={ref}, actor_id={actor_id}", file=sys.stderr)
cache_mgr = get_cache_manager()
# Try as direct CID first
path = cache_mgr.get_by_cid(ref)
print(f"RESOLVE_ASSET: get_by_cid({ref}) = {path}", file=sys.stderr)
if path and path.exists():
logger.info(f"Resolved {ref[:16]}... as CID to {path}")
return path
# Try as friendly name if actor_id provided
print(f"RESOLVE_ASSET: trying friendly name lookup, actor_id={actor_id}", file=sys.stderr)
if actor_id:
from database import resolve_friendly_name_sync, get_ipfs_cid_sync
try:
# Use synchronous database functions to avoid event loop issues
cid = resolve_friendly_name_sync(actor_id, ref)
print(f"RESOLVE_ASSET: resolve_friendly_name_sync({actor_id}, {ref}) = {cid}", file=sys.stderr)
if cid:
path = cache_mgr.get_by_cid(cid)
print(f"RESOLVE_ASSET: get_by_cid({cid}) = {path}", file=sys.stderr)
if path and path.exists():
print(f"RESOLVE_ASSET: SUCCESS - resolved to {path}", file=sys.stderr)
logger.info(f"Resolved '{ref}' via friendly name to {path}")
return path
# File not in local cache - look up IPFS CID and fetch
# The cid from friendly_names is internal, need to get ipfs_cid from cache_items
ipfs_cid = get_ipfs_cid_sync(cid)
if not ipfs_cid or ipfs_cid == cid:
# No separate IPFS CID, try using the cid directly (might be IPFS CID)
ipfs_cid = cid
print(f"RESOLVE_ASSET: file not local, trying IPFS fetch for {ipfs_cid}", file=sys.stderr)
import ipfs_client
content = ipfs_client.get_bytes(ipfs_cid, use_gateway_fallback=True)
if content:
# Save to local cache
import tempfile
from pathlib import Path
with tempfile.NamedTemporaryFile(delete=False, suffix='.sexp') as tmp:
tmp.write(content)
tmp_path = Path(tmp.name)
# Store in cache
cached_file, _ = cache_mgr.put(tmp_path, node_type="effect", skip_ipfs=True)
# Index by IPFS CID for future lookups
cache_mgr._set_content_index(cid, cached_file.cid)
print(f"RESOLVE_ASSET: fetched from IPFS and cached at {cached_file.path}", file=sys.stderr)
logger.info(f"Fetched '{ref}' from IPFS and cached at {cached_file.path}")
return cached_file.path
else:
print(f"RESOLVE_ASSET: IPFS fetch failed for {cid}", file=sys.stderr)
except Exception as e:
print(f"RESOLVE_ASSET: ERROR - {e}", file=sys.stderr)
logger.warning(f"Failed to resolve friendly name '{ref}': {e}")
logger.warning(f"Could not resolve asset reference: {ref}")
return None
class CIDVideoSource:
"""
Video source that resolves CIDs to file paths.
Wraps the streaming VideoSource to work with cached assets.
"""
def __init__(self, cid: str, fps: float = 30, actor_id: Optional[str] = None):
self.cid = cid
self.fps = fps
self.actor_id = actor_id
self._source = None
def _ensure_source(self):
if self._source is None:
logger.info(f"CIDVideoSource._ensure_source: resolving cid={self.cid} with actor_id={self.actor_id}")
path = resolve_asset(self.cid, self.actor_id)
if not path:
raise ValueError(f"Could not resolve video source '{self.cid}' for actor_id={self.actor_id}")
logger.info(f"CIDVideoSource._ensure_source: resolved to path={path}")
# Use GPU-accelerated video source if available
try:
from sexp_effects.primitive_libs.streaming_gpu import GPUVideoSource, GPU_AVAILABLE
if GPU_AVAILABLE:
logger.info(f"CIDVideoSource: using GPUVideoSource for {path}")
self._source = GPUVideoSource(str(path), self.fps, prefer_gpu=True)
else:
raise ImportError("GPU not available")
except (ImportError, Exception) as e:
logger.info(f"CIDVideoSource: falling back to CPU VideoSource ({e})")
from sexp_effects.primitive_libs.streaming import VideoSource
self._source = VideoSource(str(path), self.fps)
def read_at(self, t: float):
self._ensure_source()
return self._source.read_at(t)
def read(self):
self._ensure_source()
return self._source.read()
@property
def size(self):
self._ensure_source()
return self._source.size
@property
def duration(self):
self._ensure_source()
return self._source._duration
@property
def path(self):
self._ensure_source()
return self._source.path
@property
def _stream_time(self):
self._ensure_source()
return self._source._stream_time
def skip(self):
self._ensure_source()
return self._source.skip()
def close(self):
if self._source:
self._source.close()
class CIDAudioAnalyzer:
"""
Audio analyzer that resolves CIDs to file paths.
"""
def __init__(self, cid: str, actor_id: Optional[str] = None):
self.cid = cid
self.actor_id = actor_id
self._analyzer = None
def _ensure_analyzer(self):
if self._analyzer is None:
path = resolve_asset(self.cid, self.actor_id)
if not path:
raise ValueError(f"Could not resolve audio source: {self.cid}")
from sexp_effects.primitive_libs.streaming import AudioAnalyzer
self._analyzer = AudioAnalyzer(str(path))
def get_energy(self, t: float) -> float:
self._ensure_analyzer()
return self._analyzer.get_energy(t)
def get_beat(self, t: float) -> bool:
self._ensure_analyzer()
return self._analyzer.get_beat(t)
def get_beat_count(self, t: float) -> int:
self._ensure_analyzer()
return self._analyzer.get_beat_count(t)
@property
def duration(self):
self._ensure_analyzer()
return self._analyzer.duration
def create_cid_primitives(actor_id: Optional[str] = None):
"""
Create CID-aware primitive functions.
Returns dict of primitives that resolve CIDs before creating sources.
"""
from celery.utils.log import get_task_logger
cid_logger = get_task_logger(__name__)
def prim_make_video_source_cid(cid: str, fps: float = 30):
cid_logger.warning(f"DEBUG: CID-aware make-video-source: cid={cid}, actor_id={actor_id}")
return CIDVideoSource(cid, fps, actor_id)
def prim_make_audio_analyzer_cid(cid: str):
cid_logger.warning(f"DEBUG: CID-aware make-audio-analyzer: cid={cid}, actor_id={actor_id}")
return CIDAudioAnalyzer(cid, actor_id)
return {
'streaming:make-video-source': prim_make_video_source_cid,
'streaming:make-audio-analyzer': prim_make_audio_analyzer_cid,
}
@app.task(bind=True, name='tasks.run_stream')
def run_stream(
self,
run_id: str,
recipe_sexp: str,
output_name: str = "output.mp4",
duration: Optional[float] = None,
fps: Optional[float] = None,
actor_id: Optional[str] = None,
sources_sexp: Optional[str] = None,
audio_sexp: Optional[str] = None,
resume: bool = False,
) -> dict:
"""
Execute a streaming S-expression recipe.
Args:
run_id: The run ID for database tracking
recipe_sexp: The recipe S-expression content
output_name: Name for the output file
duration: Optional duration override (seconds)
fps: Optional FPS override
actor_id: User ID for friendly name resolution
sources_sexp: Optional sources config S-expression
audio_sexp: Optional audio config S-expression
resume: If True, load checkpoint and resume from where we left off
Returns:
Dict with output_cid, output_path, and status
"""
global _resolve_loop, _db_initialized
task_id = self.request.id
logger.info(f"Starting stream task {task_id} for run {run_id} (resume={resume})")
# Handle graceful pause (SIGTERM from Celery revoke)
pause_requested = False
original_sigterm = signal.getsignal(signal.SIGTERM)
def handle_sigterm(signum, frame):
nonlocal pause_requested
pause_requested = True
logger.info(f"Pause requested for run {run_id} (SIGTERM received)")
signal.signal(signal.SIGTERM, handle_sigterm)
self.update_state(state='INITIALIZING', meta={'progress': 0})
# Get the app directory for primitive/effect paths
app_dir = Path(__file__).parent.parent # celery/
sexp_effects_dir = app_dir / "sexp_effects"
effects_dir = app_dir / "effects"
templates_dir = app_dir / "templates"
# Create temp directory for work
work_dir = Path(tempfile.mkdtemp(prefix="stream_"))
recipe_path = work_dir / "recipe.sexp"
# Write output to shared cache for live streaming access
cache_dir = Path(os.environ.get("CACHE_DIR", "/data/cache"))
stream_dir = cache_dir / "streaming" / run_id
stream_dir.mkdir(parents=True, exist_ok=True)
# Use IPFS HLS output for distributed streaming - segments uploaded to IPFS
output_path = str(stream_dir) + "/ipfs-hls" # /ipfs-hls suffix triggers IPFS HLS mode
# Create symlinks to effect directories so relative paths work
(work_dir / "sexp_effects").symlink_to(sexp_effects_dir)
(work_dir / "effects").symlink_to(effects_dir)
(work_dir / "templates").symlink_to(templates_dir)
try:
# Write recipe to temp file
recipe_path.write_text(recipe_sexp)
# Write optional config files
sources_path = None
if sources_sexp:
sources_path = work_dir / "sources.sexp"
sources_path.write_text(sources_sexp)
audio_path = None
if audio_sexp:
audio_path = work_dir / "audio.sexp"
audio_path.write_text(audio_sexp)
self.update_state(state='RENDERING', meta={'progress': 5})
# Import the streaming interpreter
from streaming.stream_sexp_generic import StreamInterpreter
# Load checkpoint if resuming
checkpoint = None
if resume:
import asyncio
import database
try:
if _resolve_loop is None or _resolve_loop.is_closed():
_resolve_loop = asyncio.new_event_loop()
asyncio.set_event_loop(_resolve_loop)
_db_initialized = False
if not _db_initialized:
_resolve_loop.run_until_complete(database.init_db())
_db_initialized = True
checkpoint = _resolve_loop.run_until_complete(database.get_run_checkpoint(run_id))
if checkpoint:
logger.info(f"Loaded checkpoint for run {run_id}: frame {checkpoint.get('frame_num')}")
else:
logger.warning(f"No checkpoint found for run {run_id}, starting from beginning")
except Exception as e:
logger.error(f"Failed to load checkpoint: {e}")
checkpoint = None
# Create interpreter (pass actor_id for friendly name resolution)
interp = StreamInterpreter(str(recipe_path), actor_id=actor_id)
# Set primitive library directory explicitly
interp.primitive_lib_dir = sexp_effects_dir / "primitive_libs"
if fps:
interp.config['fps'] = fps
if sources_path:
interp.sources_config = sources_path
if audio_path:
interp.audio_config = audio_path
# Override primitives with CID-aware versions
cid_prims = create_cid_primitives(actor_id)
from celery.utils.log import get_task_logger
task_logger = get_task_logger(__name__)
task_logger.warning(f"DEBUG: Overriding primitives: {list(cid_prims.keys())}")
task_logger.warning(f"DEBUG: Primitives before: {list(interp.primitives.keys())[:10]}...")
interp.primitives.update(cid_prims)
task_logger.warning(f"DEBUG: streaming:make-video-source is now: {type(interp.primitives.get('streaming:make-video-source'))}")
# Set up callback to update database when IPFS playlist is created (for live HLS redirect)
def on_playlist_update(playlist_cid, quality_playlists=None):
"""Update database with playlist CID and quality info.
Args:
playlist_cid: Master playlist CID
quality_playlists: Dict of quality name -> {cid, width, height, bitrate}
"""
global _resolve_loop, _db_initialized
import asyncio
import database
try:
if _resolve_loop is None or _resolve_loop.is_closed():
_resolve_loop = asyncio.new_event_loop()
asyncio.set_event_loop(_resolve_loop)
_db_initialized = False
if not _db_initialized:
_resolve_loop.run_until_complete(database.init_db())
_db_initialized = True
_resolve_loop.run_until_complete(database.update_pending_run_playlist(run_id, playlist_cid, quality_playlists))
logger.info(f"Updated pending run {run_id} with IPFS playlist: {playlist_cid}, qualities: {list(quality_playlists.keys()) if quality_playlists else []}")
except Exception as e:
logger.error(f"Failed to update playlist CID in database: {e}")
interp.on_playlist_update = on_playlist_update
# Set up progress callback to update Celery task state
def on_progress(pct, frame_num, total_frames):
nonlocal pause_requested
# Scale progress: 5% (start) to 85% (before caching)
scaled_progress = 5 + (pct * 0.8) # 5% to 85%
self.update_state(state='RENDERING', meta={
'progress': scaled_progress,
'frame': frame_num,
'total_frames': total_frames,
'percent': pct,
})
interp.on_progress = on_progress
# Set up checkpoint callback to save state at segment boundaries
def on_checkpoint(ckpt):
"""Save checkpoint state to database."""
nonlocal pause_requested
global _resolve_loop, _db_initialized
import asyncio
import database
try:
if _resolve_loop is None or _resolve_loop.is_closed():
_resolve_loop = asyncio.new_event_loop()
asyncio.set_event_loop(_resolve_loop)
_db_initialized = False
if not _db_initialized:
_resolve_loop.run_until_complete(database.init_db())
_db_initialized = True
# Get total frames from interpreter config
total_frames = None
if hasattr(interp, 'output') and hasattr(interp.output, '_frame_count'):
# Estimate total frames based on duration
fps_val = interp.config.get('fps', 30)
for name, val in interp.globals.items():
if hasattr(val, 'duration'):
total_frames = int(val.duration * fps_val)
break
_resolve_loop.run_until_complete(database.update_pending_run_checkpoint(
run_id=run_id,
checkpoint_frame=ckpt['frame_num'],
checkpoint_t=ckpt['t'],
checkpoint_scans=ckpt.get('scans'),
total_frames=total_frames,
))
logger.info(f"Saved checkpoint for run {run_id}: frame {ckpt['frame_num']}")
# Check if pause was requested after checkpoint
if pause_requested:
logger.info(f"Pause requested after checkpoint, raising PauseRequested")
raise PauseRequested("Render paused by user")
except PauseRequested:
raise # Re-raise to stop the render
except Exception as e:
logger.error(f"Failed to save checkpoint: {e}")
interp.on_checkpoint = on_checkpoint
# Build resume state for the interpreter (includes segment CIDs for output)
resume_from = None
if checkpoint:
resume_from = {
'frame_num': checkpoint.get('frame_num'),
't': checkpoint.get('t'),
'scans': checkpoint.get('scans', {}),
}
# Add segment CIDs if available (from quality_playlists in checkpoint)
# Note: We need to extract segment_cids from the output's state, which isn't
# directly stored. For now, the output will re-check existing segments on disk.
# Run rendering to file
logger.info(f"Rendering to {output_path}" + (f" (resuming from frame {resume_from['frame_num']})" if resume_from else ""))
render_paused = False
try:
interp.run(duration=duration, output=str(output_path), resume_from=resume_from)
except PauseRequested:
# Graceful pause - checkpoint already saved
render_paused = True
logger.info(f"Render paused for run {run_id}")
# Restore original signal handler
signal.signal(signal.SIGTERM, original_sigterm)
if render_paused:
import asyncio
import database
try:
if _resolve_loop is None or _resolve_loop.is_closed():
_resolve_loop = asyncio.new_event_loop()
asyncio.set_event_loop(_resolve_loop)
_db_initialized = False
if not _db_initialized:
_resolve_loop.run_until_complete(database.init_db())
_db_initialized = True
_resolve_loop.run_until_complete(database.update_pending_run_status(run_id, 'paused'))
except Exception as e:
logger.error(f"Failed to update status to paused: {e}")
return {"status": "paused", "run_id": run_id, "task_id": task_id}
# Check for interpreter errors
if interp.errors:
error_msg = f"Rendering failed with {len(interp.errors)} errors: {interp.errors[0]}"
raise RuntimeError(error_msg)
self.update_state(state='CACHING', meta={'progress': 90})
# Get IPFS playlist CID if available (from IPFSHLSOutput)
ipfs_playlist_cid = None
ipfs_playlist_url = None
segment_cids = {}
if hasattr(interp, 'output') and hasattr(interp.output, 'playlist_cid'):
ipfs_playlist_cid = interp.output.playlist_cid
ipfs_playlist_url = interp.output.playlist_url
segment_cids = getattr(interp.output, 'segment_cids', {})
logger.info(f"IPFS HLS: playlist={ipfs_playlist_cid}, segments={len(segment_cids)}")
# Update pending run with playlist CID for live HLS redirect
if ipfs_playlist_cid:
import asyncio
import database
try:
if _resolve_loop is None or _resolve_loop.is_closed():
_resolve_loop = asyncio.new_event_loop()
asyncio.set_event_loop(_resolve_loop)
if not _db_initialized:
_resolve_loop.run_until_complete(database.init_db())
_db_initialized = True
_resolve_loop.run_until_complete(database.update_pending_run_playlist(run_id, ipfs_playlist_cid))
logger.info(f"Updated pending run {run_id} with IPFS playlist CID: {ipfs_playlist_cid}")
except Exception as e:
logger.error(f"Failed to update pending run with playlist CID: {e}")
raise # Fail fast - database errors should not be silently ignored
# HLS output creates playlist and segments
# - Single-res: stream_dir/stream.m3u8 and stream_dir/segment_*.ts
# - Multi-res: stream_dir/original/playlist.m3u8 and stream_dir/original/segment_*.ts
hls_playlist = stream_dir / "stream.m3u8"
if not hls_playlist.exists():
# Try multi-res output path
hls_playlist = stream_dir / "original" / "playlist.m3u8"
# Validate HLS output (must have playlist and at least one segment)
if not hls_playlist.exists():
raise RuntimeError("HLS playlist not created - rendering likely failed")
segments = list(stream_dir.glob("segment_*.ts"))
if not segments:
# Try multi-res output path
segments = list(stream_dir.glob("original/segment_*.ts"))
if not segments:
raise RuntimeError("No HLS segments created - rendering likely failed")
logger.info(f"HLS rendering complete: {len(segments)} segments created, IPFS playlist: {ipfs_playlist_cid}")
# Mux HLS segments into a single MP4 for persistent cache storage
final_mp4 = stream_dir / "output.mp4"
import subprocess
mux_cmd = [
"ffmpeg", "-y",
"-i", str(hls_playlist),
"-c", "copy", # Just copy streams, no re-encoding
"-movflags", "+faststart", # Move moov atom to start for web playback
"-fflags", "+genpts", # Generate proper timestamps
str(final_mp4)
]
logger.info(f"Muxing HLS to MP4: {' '.join(mux_cmd)}")
result = subprocess.run(mux_cmd, capture_output=True, text=True)
if result.returncode != 0:
logger.warning(f"HLS mux failed: {result.stderr}")
# Fall back to using the first segment for caching
final_mp4 = segments[0]
# Store output in cache
if final_mp4.exists():
cache_mgr = get_cache_manager()
cached_file, ipfs_cid = cache_mgr.put(
source_path=final_mp4,
node_type="STREAM_OUTPUT",
node_id=f"stream_{task_id}",
)
logger.info(f"Stream output cached: CID={cached_file.cid}, IPFS={ipfs_cid}")
# Save to database - reuse the module-level loop to avoid pool conflicts
import asyncio
import database
try:
# Reuse or create event loop
if _resolve_loop is None or _resolve_loop.is_closed():
_resolve_loop = asyncio.new_event_loop()
asyncio.set_event_loop(_resolve_loop)
_db_initialized = False
# Initialize database pool if needed
if not _db_initialized:
_resolve_loop.run_until_complete(database.init_db())
_db_initialized = True
# Get recipe CID from pending_run
pending = _resolve_loop.run_until_complete(database.get_pending_run(run_id))
recipe_cid = pending.get("recipe", "streaming") if pending else "streaming"
# Save to run_cache for completed runs
logger.info(f"Saving run {run_id} to run_cache with actor_id={actor_id}")
_resolve_loop.run_until_complete(database.save_run_cache(
run_id=run_id,
output_cid=cached_file.cid,
recipe=recipe_cid,
inputs=[],
ipfs_cid=ipfs_cid,
actor_id=actor_id,
))
# Register output as video type so frontend displays it correctly
_resolve_loop.run_until_complete(database.add_item_type(
cid=cached_file.cid,
actor_id=actor_id,
item_type="video",
path=str(cached_file.path),
description=f"Stream output from run {run_id}",
))
logger.info(f"Registered output {cached_file.cid} as video type")
# Update pending run status
_resolve_loop.run_until_complete(database.update_pending_run_status(
run_id=run_id,
status="completed",
))
logger.info(f"Saved run {run_id} to database with actor_id={actor_id}")
except Exception as db_err:
logger.error(f"Failed to save run to database: {db_err}")
raise RuntimeError(f"Database error saving run {run_id}: {db_err}") from db_err
return {
"status": "completed",
"run_id": run_id,
"task_id": task_id,
"output_cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
"output_path": str(cached_file.path),
# IPFS HLS streaming info
"ipfs_playlist_cid": ipfs_playlist_cid,
"ipfs_playlist_url": ipfs_playlist_url,
"ipfs_segment_count": len(segment_cids),
}
else:
# Update pending run status to failed - reuse module loop
import asyncio
import database
try:
if _resolve_loop is None or _resolve_loop.is_closed():
_resolve_loop = asyncio.new_event_loop()
asyncio.set_event_loop(_resolve_loop)
_db_initialized = False
if not _db_initialized:
_resolve_loop.run_until_complete(database.init_db())
_db_initialized = True
_resolve_loop.run_until_complete(database.update_pending_run_status(
run_id=run_id,
status="failed",
error="Output file not created",
))
except Exception as db_err:
logger.warning(f"Failed to update run status: {db_err}")
return {
"status": "failed",
"run_id": run_id,
"task_id": task_id,
"error": "Output file not created",
}
except Exception as e:
logger.error(f"Stream task {task_id} failed: {e}")
import traceback
traceback.print_exc()
# Update pending run status to failed - reuse module loop
import asyncio
import database
try:
if _resolve_loop is None or _resolve_loop.is_closed():
_resolve_loop = asyncio.new_event_loop()
asyncio.set_event_loop(_resolve_loop)
_db_initialized = False
if not _db_initialized:
_resolve_loop.run_until_complete(database.init_db())
_db_initialized = True
_resolve_loop.run_until_complete(database.update_pending_run_status(
run_id=run_id,
status="failed",
error=str(e),
))
except Exception as db_err:
logger.warning(f"Failed to update run status: {db_err}")
return {
"status": "failed",
"run_id": run_id,
"task_id": task_id,
"error": str(e),
}
finally:
# Cleanup temp directory only - NOT the streaming directory!
# The streaming directory contains HLS segments that may still be uploading
# to IPFS. Deleting it prematurely causes upload failures and missing segments.
#
# stream_dir cleanup should happen via:
# 1. A separate cleanup task that runs after confirming IPFS uploads succeeded
# 2. Or a periodic cleanup job that removes old streaming dirs
import shutil
if work_dir.exists():
shutil.rmtree(work_dir, ignore_errors=True)
# NOTE: stream_dir is intentionally NOT deleted here to allow IPFS uploads to complete
# TODO: Implement a deferred cleanup mechanism for stream_dir after IPFS confirmation