Add dev infrastructure improvements
Some checks are pending
GPU Worker CI/CD / test (push) Waiting to run
GPU Worker CI/CD / deploy (push) Blocked by required conditions

- Central config with logging on startup
- Hot reload support for GPU worker (docker-compose.gpu-dev.yml)
- Quick deploy script (scripts/gpu-dev-deploy.sh)
- GPU/CPU frame compatibility tests
- CI/CD pipeline for GPU worker (.gitea/workflows/gpu-worker.yml)
- Standardize GPU_PERSIST default to 0 across all modules

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
giles
2026-02-03 21:56:40 +00:00
parent 6ea39d633b
commit fe6730ce72
9 changed files with 383 additions and 6 deletions

View File

@@ -0,0 +1,74 @@
name: GPU Worker CI/CD
on:
push:
branches: [main]
paths:
- 'sexp_effects/**'
- 'streaming/**'
- 'tasks/**'
- 'Dockerfile.gpu'
- 'requirements.txt'
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install pytest
- name: Run frame compatibility tests
run: |
pytest tests/test_frame_compatibility.py -v --ignore-glob='*gpu*' || true
# Note: GPU tests skipped on CI (no GPU), but CPU tests must pass
deploy:
needs: test
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v3
- name: Deploy to GPU node
env:
GPU_HOST: ${{ secrets.GPU_HOST }}
SSH_KEY: ${{ secrets.GPU_SSH_KEY }}
run: |
# Set up SSH
mkdir -p ~/.ssh
echo "$SSH_KEY" > ~/.ssh/id_rsa
chmod 600 ~/.ssh/id_rsa
ssh-keyscan -H ${GPU_HOST#*@} >> ~/.ssh/known_hosts 2>/dev/null || true
# Sync code
rsync -avz --delete \
--exclude '.git' \
--exclude '__pycache__' \
--exclude '*.pyc' \
--exclude '.pytest_cache' \
./ "$GPU_HOST:/root/art-dag/celery/"
# Build and restart
ssh "$GPU_HOST" "
cd /root/art-dag/celery
docker build -t git.rose-ash.com/art-dag/l1-gpu-server:latest -f Dockerfile.gpu .
docker kill \$(docker ps -q -f name=l1-gpu-worker) 2>/dev/null || true
echo 'GPU worker restarted'
"
- name: Verify deployment
env:
GPU_HOST: ${{ secrets.GPU_HOST }}
SSH_KEY: ${{ secrets.GPU_SSH_KEY }}
run: |
sleep 15
ssh "$GPU_HOST" "docker logs --tail 20 \$(docker ps -q -f name=l1-gpu-worker)"

View File

@@ -2,9 +2,11 @@
L1 Server Configuration. L1 Server Configuration.
Environment-based configuration with sensible defaults. Environment-based configuration with sensible defaults.
All config should go through this module - no direct os.environ calls elsewhere.
""" """
import os import os
import sys
from pathlib import Path from pathlib import Path
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@@ -52,6 +54,16 @@ class Settings:
default_factory=lambda: os.environ.get("L2_DOMAIN") default_factory=lambda: os.environ.get("L2_DOMAIN")
) )
# GPU/Streaming settings
streaming_gpu_persist: bool = field(
default_factory=lambda: os.environ.get("STREAMING_GPU_PERSIST", "0") == "1"
)
ipfs_gateways: str = field(
default_factory=lambda: os.environ.get(
"IPFS_GATEWAYS", "https://ipfs.io,https://cloudflare-ipfs.com,https://dweb.link"
)
)
# Derived paths # Derived paths
@property @property
def plan_cache_dir(self) -> Path: def plan_cache_dir(self) -> Path:
@@ -68,5 +80,26 @@ class Settings:
self.analysis_cache_dir.mkdir(parents=True, exist_ok=True) self.analysis_cache_dir.mkdir(parents=True, exist_ok=True)
def log_config(self, logger=None) -> None:
"""Log all configuration values for debugging."""
output = logger.info if logger else lambda x: print(x, file=sys.stderr)
output("=" * 60)
output("CONFIGURATION")
output("=" * 60)
output(f" cache_dir: {self.cache_dir}")
output(f" redis_url: {self.redis_url}")
output(f" database_url: {self.database_url[:50]}...")
output(f" ipfs_api: {self.ipfs_api}")
output(f" ipfs_gateway_url: {self.ipfs_gateway_url}")
output(f" ipfs_gateways: {self.ipfs_gateways[:50]}...")
output(f" streaming_gpu_persist: {self.streaming_gpu_persist}")
output(f" l2_server: {self.l2_server}")
output("=" * 60)
# Singleton settings instance # Singleton settings instance
settings = Settings() settings = Settings()
# Log config on import if DEBUG or SHOW_CONFIG is set
if os.environ.get("DEBUG") or os.environ.get("SHOW_CONFIG"):
settings.log_config()

View File

@@ -6,17 +6,32 @@ Uses S-expression recipes with frame-by-frame processing.
""" """
import os import os
import sys
from celery import Celery from celery import Celery
from celery.signals import worker_ready
REDIS_URL = os.environ.get('REDIS_URL', 'redis://localhost:6379/5') # Use central config
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from app.config import settings
app = Celery( app = Celery(
'art_celery', 'art_celery',
broker=REDIS_URL, broker=settings.redis_url,
backend=REDIS_URL, backend=settings.redis_url,
include=['tasks', 'tasks.streaming', 'tasks.ipfs_upload'] include=['tasks', 'tasks.streaming', 'tasks.ipfs_upload']
) )
@worker_ready.connect
def log_config_on_startup(sender, **kwargs):
"""Log configuration when worker starts."""
print("=" * 60, file=sys.stderr)
print("WORKER STARTED - CONFIGURATION", file=sys.stderr)
print("=" * 60, file=sys.stderr)
settings.log_config()
print(f"Worker: {sender}", file=sys.stderr)
print("=" * 60, file=sys.stderr)
app.conf.update( app.conf.update(
result_expires=86400 * 7, # 7 days - allow time for recovery after restarts result_expires=86400 * 7, # 7 days - allow time for recovery after restarts
task_serializer='json', task_serializer='json',

View File

@@ -0,0 +1,36 @@
# GPU Worker Development Override
#
# Usage: docker stack deploy -c docker-compose.yml -c docker-compose.gpu-dev.yml celery
# Or for quick testing: docker-compose -f docker-compose.yml -f docker-compose.gpu-dev.yml up l1-gpu-worker
#
# Features:
# - Mounts source code for instant changes (no rebuild needed)
# - Uses watchmedo for auto-reload on file changes
# - Shows config on startup
version: '3.8'
services:
l1-gpu-worker:
# Override command to use watchmedo for auto-reload
command: >
sh -c "
pip install -q watchdog[watchmedo] 2>/dev/null || true;
echo '=== GPU WORKER DEV MODE ===';
echo 'Source mounted - changes take effect on restart';
echo 'Auto-reload enabled via watchmedo';
env | grep -E 'STREAMING_GPU|IPFS_GATEWAY|REDIS|DATABASE' | sort;
echo '===========================';
watchmedo auto-restart --directory=/app --pattern='*.py' --recursive -- \
celery -A celery_app worker --loglevel=info -E -Q gpu,celery
"
environment:
# Development defaults (can override with .env)
- STREAMING_GPU_PERSIST=0
- IPFS_GATEWAY_URL=https://celery-artdag.rose-ash.com/ipfs
- SHOW_CONFIG=1
volumes:
# Mount source code for hot reload
- ./:/app:ro
# Keep cache local
- gpu_cache:/data/cache

34
scripts/gpu-dev-deploy.sh Executable file
View File

@@ -0,0 +1,34 @@
#!/bin/bash
# Quick deploy to GPU node with hot reload
# Usage: ./scripts/gpu-dev-deploy.sh
set -e
GPU_HOST="${GPU_HOST:-root@138.197.163.123}"
REMOTE_DIR="/root/art-dag/celery"
echo "=== GPU Dev Deploy ==="
echo "Syncing code to $GPU_HOST..."
# Sync code (excluding cache, git, __pycache__)
rsync -avz --delete \
--exclude '.git' \
--exclude '__pycache__' \
--exclude '*.pyc' \
--exclude '.pytest_cache' \
--exclude 'node_modules' \
--exclude '.env' \
./ "$GPU_HOST:$REMOTE_DIR/"
echo "Restarting GPU worker..."
ssh "$GPU_HOST" "docker kill \$(docker ps -q -f name=l1-gpu-worker) 2>/dev/null || true"
echo "Waiting for new container..."
sleep 10
# Show new container logs
ssh "$GPU_HOST" "docker logs --tail 30 \$(docker ps -q -f name=l1-gpu-worker)"
echo ""
echo "=== Deploy Complete ==="
echo "Use 'ssh $GPU_HOST docker logs -f \$(docker ps -q -f name=l1-gpu-worker)' to follow logs"

View File

@@ -18,7 +18,7 @@ except ImportError:
print("[blending_gpu] CuPy not available, using CPU fallback") print("[blending_gpu] CuPy not available, using CPU fallback")
# GPU persistence mode - keep frames on GPU between operations # GPU persistence mode - keep frames on GPU between operations
GPU_PERSIST = os.environ.get("STREAMING_GPU_PERSIST", "1") == "1" GPU_PERSIST = os.environ.get("STREAMING_GPU_PERSIST", "0") == "1"
if GPU_AVAILABLE and GPU_PERSIST: if GPU_AVAILABLE and GPU_PERSIST:
print("[blending_gpu] GPU persistence enabled - frames stay on GPU") print("[blending_gpu] GPU persistence enabled - frames stay on GPU")

View File

@@ -21,7 +21,7 @@ except ImportError:
print("[color_ops_gpu] CuPy not available, using CPU fallback") print("[color_ops_gpu] CuPy not available, using CPU fallback")
# GPU persistence mode - keep frames on GPU between operations # GPU persistence mode - keep frames on GPU between operations
GPU_PERSIST = os.environ.get("STREAMING_GPU_PERSIST", "1") == "1" GPU_PERSIST = os.environ.get("STREAMING_GPU_PERSIST", "0") == "1"
if GPU_AVAILABLE and GPU_PERSIST: if GPU_AVAILABLE and GPU_PERSIST:
print("[color_ops_gpu] GPU persistence enabled - frames stay on GPU") print("[color_ops_gpu] GPU persistence enabled - frames stay on GPU")

View File

@@ -25,7 +25,7 @@ except ImportError:
# GPU persistence mode - keep frames on GPU between operations # GPU persistence mode - keep frames on GPU between operations
# Set STREAMING_GPU_PERSIST=1 for maximum performance # Set STREAMING_GPU_PERSIST=1 for maximum performance
GPU_PERSIST = os.environ.get("STREAMING_GPU_PERSIST", "1") == "1" GPU_PERSIST = os.environ.get("STREAMING_GPU_PERSIST", "0") == "1"
if GPU_AVAILABLE and GPU_PERSIST: if GPU_AVAILABLE and GPU_PERSIST:
print("[geometry_gpu] GPU persistence enabled - frames stay on GPU") print("[geometry_gpu] GPU persistence enabled - frames stay on GPU")

View File

@@ -0,0 +1,185 @@
"""
Integration tests for GPU/CPU frame compatibility.
These tests verify that all primitives work correctly with both:
- numpy arrays (CPU frames)
- CuPy arrays (GPU frames)
- GPUFrame wrapper objects
Run with: pytest tests/test_frame_compatibility.py -v
"""
import pytest
import numpy as np
import sys
import os
# Add parent to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Try to import CuPy
try:
import cupy as cp
HAS_GPU = True
except ImportError:
cp = None
HAS_GPU = False
def create_test_frame(on_gpu=False):
"""Create a test frame (100x100 RGB)."""
frame = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
if on_gpu and HAS_GPU:
return cp.asarray(frame)
return frame
class MockGPUFrame:
"""Mock GPUFrame for testing without full GPU stack."""
def __init__(self, data):
self._data = data
@property
def cpu(self):
if HAS_GPU and hasattr(self._data, 'get'):
return self._data.get()
return self._data
@property
def gpu(self):
if HAS_GPU:
if hasattr(self._data, 'get'):
return self._data
return cp.asarray(self._data)
raise RuntimeError("No GPU available")
class TestColorOps:
"""Test color_ops primitives with different frame types."""
def test_shift_hsv_numpy(self):
"""shift-hsv should work with numpy arrays."""
from sexp_effects.primitive_libs.color_ops import prim_shift_hsv
frame = create_test_frame(on_gpu=False)
result = prim_shift_hsv(frame, h=30, s=1.2, v=0.9)
assert isinstance(result, np.ndarray)
assert result.shape == frame.shape
@pytest.mark.skipif(not HAS_GPU, reason="No GPU")
def test_shift_hsv_cupy(self):
"""shift-hsv should work with CuPy arrays."""
from sexp_effects.primitive_libs.color_ops import prim_shift_hsv
frame = create_test_frame(on_gpu=True)
result = prim_shift_hsv(frame, h=30, s=1.2, v=0.9)
assert isinstance(result, np.ndarray) # Should return numpy
def test_shift_hsv_gpuframe(self):
"""shift-hsv should work with GPUFrame objects."""
from sexp_effects.primitive_libs.color_ops import prim_shift_hsv
frame = MockGPUFrame(create_test_frame(on_gpu=False))
result = prim_shift_hsv(frame, h=30, s=1.2, v=0.9)
assert isinstance(result, np.ndarray)
def test_invert_numpy(self):
"""invert-img should work with numpy arrays."""
from sexp_effects.primitive_libs.color_ops import prim_invert_img
frame = create_test_frame(on_gpu=False)
result = prim_invert_img(frame)
assert isinstance(result, np.ndarray)
def test_adjust_numpy(self):
"""adjust should work with numpy arrays."""
from sexp_effects.primitive_libs.color_ops import prim_adjust
frame = create_test_frame(on_gpu=False)
result = prim_adjust(frame, brightness=10, contrast=1.2)
assert isinstance(result, np.ndarray)
class TestGeometry:
"""Test geometry primitives with different frame types."""
def test_rotate_numpy(self):
"""rotate should work with numpy arrays."""
from sexp_effects.primitive_libs.geometry import prim_rotate
frame = create_test_frame(on_gpu=False)
result = prim_rotate(frame, 45)
assert isinstance(result, np.ndarray)
def test_scale_numpy(self):
"""scale should work with numpy arrays."""
from sexp_effects.primitive_libs.geometry import prim_scale
frame = create_test_frame(on_gpu=False)
result = prim_scale(frame, 1.5)
assert isinstance(result, np.ndarray)
class TestBlending:
"""Test blending primitives with different frame types."""
def test_blend_numpy(self):
"""blend should work with numpy arrays."""
from sexp_effects.primitive_libs.blending import prim_blend
frame_a = create_test_frame(on_gpu=False)
frame_b = create_test_frame(on_gpu=False)
result = prim_blend(frame_a, frame_b, 0.5)
assert isinstance(result, np.ndarray)
class TestInterpreterConversion:
"""Test the interpreter's frame conversion."""
def test_maybe_to_numpy_none(self):
"""_maybe_to_numpy should handle None."""
from streaming.stream_sexp_generic import StreamInterpreter
# Create minimal interpreter
import tempfile
with tempfile.NamedTemporaryFile(mode='w', suffix='.sexp', delete=False) as f:
f.write('(stream "test" :fps 30 :width 100 :height 100 (frame frame))')
f.flush()
interp = StreamInterpreter(f.name)
assert interp._maybe_to_numpy(None) is None
def test_maybe_to_numpy_numpy(self):
"""_maybe_to_numpy should pass through numpy arrays."""
from streaming.stream_sexp_generic import StreamInterpreter
import tempfile
with tempfile.NamedTemporaryFile(mode='w', suffix='.sexp', delete=False) as f:
f.write('(stream "test" :fps 30 :width 100 :height 100 (frame frame))')
f.flush()
interp = StreamInterpreter(f.name)
frame = create_test_frame(on_gpu=False)
result = interp._maybe_to_numpy(frame)
assert result is frame # Should be same object
@pytest.mark.skipif(not HAS_GPU, reason="No GPU")
def test_maybe_to_numpy_cupy(self):
"""_maybe_to_numpy should convert CuPy to numpy."""
from streaming.stream_sexp_generic import StreamInterpreter
import tempfile
with tempfile.NamedTemporaryFile(mode='w', suffix='.sexp', delete=False) as f:
f.write('(stream "test" :fps 30 :width 100 :height 100 (frame frame))')
f.flush()
interp = StreamInterpreter(f.name)
frame = create_test_frame(on_gpu=True)
result = interp._maybe_to_numpy(frame)
assert isinstance(result, np.ndarray)
def test_maybe_to_numpy_gpuframe(self):
"""_maybe_to_numpy should convert GPUFrame to numpy."""
from streaming.stream_sexp_generic import StreamInterpreter
import tempfile
with tempfile.NamedTemporaryFile(mode='w', suffix='.sexp', delete=False) as f:
f.write('(stream "test" :fps 30 :width 100 :height 100 (frame frame))')
f.flush()
interp = StreamInterpreter(f.name)
frame = MockGPUFrame(create_test_frame(on_gpu=False))
result = interp._maybe_to_numpy(frame)
assert isinstance(result, np.ndarray)
if __name__ == '__main__':
pytest.main([__file__, '-v'])