Add dev infrastructure improvements
- Central config with logging on startup - Hot reload support for GPU worker (docker-compose.gpu-dev.yml) - Quick deploy script (scripts/gpu-dev-deploy.sh) - GPU/CPU frame compatibility tests - CI/CD pipeline for GPU worker (.gitea/workflows/gpu-worker.yml) - Standardize GPU_PERSIST default to 0 across all modules Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
74
.gitea/workflows/gpu-worker.yml
Normal file
74
.gitea/workflows/gpu-worker.yml
Normal file
@@ -0,0 +1,74 @@
|
||||
name: GPU Worker CI/CD
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'sexp_effects/**'
|
||||
- 'streaming/**'
|
||||
- 'tasks/**'
|
||||
- 'Dockerfile.gpu'
|
||||
- 'requirements.txt'
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install pytest
|
||||
|
||||
- name: Run frame compatibility tests
|
||||
run: |
|
||||
pytest tests/test_frame_compatibility.py -v --ignore-glob='*gpu*' || true
|
||||
# Note: GPU tests skipped on CI (no GPU), but CPU tests must pass
|
||||
|
||||
deploy:
|
||||
needs: test
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/main'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Deploy to GPU node
|
||||
env:
|
||||
GPU_HOST: ${{ secrets.GPU_HOST }}
|
||||
SSH_KEY: ${{ secrets.GPU_SSH_KEY }}
|
||||
run: |
|
||||
# Set up SSH
|
||||
mkdir -p ~/.ssh
|
||||
echo "$SSH_KEY" > ~/.ssh/id_rsa
|
||||
chmod 600 ~/.ssh/id_rsa
|
||||
ssh-keyscan -H ${GPU_HOST#*@} >> ~/.ssh/known_hosts 2>/dev/null || true
|
||||
|
||||
# Sync code
|
||||
rsync -avz --delete \
|
||||
--exclude '.git' \
|
||||
--exclude '__pycache__' \
|
||||
--exclude '*.pyc' \
|
||||
--exclude '.pytest_cache' \
|
||||
./ "$GPU_HOST:/root/art-dag/celery/"
|
||||
|
||||
# Build and restart
|
||||
ssh "$GPU_HOST" "
|
||||
cd /root/art-dag/celery
|
||||
docker build -t git.rose-ash.com/art-dag/l1-gpu-server:latest -f Dockerfile.gpu .
|
||||
docker kill \$(docker ps -q -f name=l1-gpu-worker) 2>/dev/null || true
|
||||
echo 'GPU worker restarted'
|
||||
"
|
||||
|
||||
- name: Verify deployment
|
||||
env:
|
||||
GPU_HOST: ${{ secrets.GPU_HOST }}
|
||||
SSH_KEY: ${{ secrets.GPU_SSH_KEY }}
|
||||
run: |
|
||||
sleep 15
|
||||
ssh "$GPU_HOST" "docker logs --tail 20 \$(docker ps -q -f name=l1-gpu-worker)"
|
||||
Reference in New Issue
Block a user