- Central config with logging on startup - Hot reload support for GPU worker (docker-compose.gpu-dev.yml) - Quick deploy script (scripts/gpu-dev-deploy.sh) - GPU/CPU frame compatibility tests - CI/CD pipeline for GPU worker (.gitea/workflows/gpu-worker.yml) - Standardize GPU_PERSIST default to 0 across all modules Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
75 lines
2.0 KiB
YAML
75 lines
2.0 KiB
YAML
name: GPU Worker CI/CD
|
|
|
|
on:
|
|
push:
|
|
branches: [main]
|
|
paths:
|
|
- 'sexp_effects/**'
|
|
- 'streaming/**'
|
|
- 'tasks/**'
|
|
- 'Dockerfile.gpu'
|
|
- 'requirements.txt'
|
|
|
|
jobs:
|
|
test:
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v3
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v4
|
|
with:
|
|
python-version: '3.11'
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
pip install -r requirements.txt
|
|
pip install pytest
|
|
|
|
- name: Run frame compatibility tests
|
|
run: |
|
|
pytest tests/test_frame_compatibility.py -v --ignore-glob='*gpu*' || true
|
|
# Note: GPU tests skipped on CI (no GPU), but CPU tests must pass
|
|
|
|
deploy:
|
|
needs: test
|
|
runs-on: ubuntu-latest
|
|
if: github.ref == 'refs/heads/main'
|
|
steps:
|
|
- uses: actions/checkout@v3
|
|
|
|
- name: Deploy to GPU node
|
|
env:
|
|
GPU_HOST: ${{ secrets.GPU_HOST }}
|
|
SSH_KEY: ${{ secrets.GPU_SSH_KEY }}
|
|
run: |
|
|
# Set up SSH
|
|
mkdir -p ~/.ssh
|
|
echo "$SSH_KEY" > ~/.ssh/id_rsa
|
|
chmod 600 ~/.ssh/id_rsa
|
|
ssh-keyscan -H ${GPU_HOST#*@} >> ~/.ssh/known_hosts 2>/dev/null || true
|
|
|
|
# Sync code
|
|
rsync -avz --delete \
|
|
--exclude '.git' \
|
|
--exclude '__pycache__' \
|
|
--exclude '*.pyc' \
|
|
--exclude '.pytest_cache' \
|
|
./ "$GPU_HOST:/root/art-dag/celery/"
|
|
|
|
# Build and restart
|
|
ssh "$GPU_HOST" "
|
|
cd /root/art-dag/celery
|
|
docker build -t git.rose-ash.com/art-dag/l1-gpu-server:latest -f Dockerfile.gpu .
|
|
docker kill \$(docker ps -q -f name=l1-gpu-worker) 2>/dev/null || true
|
|
echo 'GPU worker restarted'
|
|
"
|
|
|
|
- name: Verify deployment
|
|
env:
|
|
GPU_HOST: ${{ secrets.GPU_HOST }}
|
|
SSH_KEY: ${{ secrets.GPU_SSH_KEY }}
|
|
run: |
|
|
sleep 15
|
|
ssh "$GPU_HOST" "docker logs --tail 20 \$(docker ps -q -f name=l1-gpu-worker)"
|