Simplify CI: remove GPU workflow, use registry images
All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 2m32s

- Remove gpu-worker.yml (no GPU server)
- Fix ci.yml: install ssh/rsync in job container, remove GPU steps
- Remove source mounts from l1-server and l1-worker so they use image code

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
giles
2026-02-06 11:10:59 +00:00
parent 48018d09b7
commit 88ded8c927
3 changed files with 6 additions and 110 deletions

View File

@@ -7,7 +7,6 @@ on:
env:
REGISTRY: registry.rose-ash.com:5000
IMAGE_CPU: celery-l1-server
IMAGE_GPU: celery-l1-gpu-server
jobs:
build-and-deploy:
@@ -15,6 +14,10 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Install tools
run: |
apt-get update && apt-get install -y --no-install-recommends openssh-client rsync
- name: Set up SSH
env:
SSH_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
@@ -35,9 +38,10 @@ jobs:
--exclude '*.pyc' \
--exclude '.pytest_cache' \
--exclude 'venv' \
--exclude '.env' \
./ "root@$DEPLOY_HOST:/root/art-dag/celery/"
- name: Build and push CPU image
- name: Build and push image
env:
DEPLOY_HOST: ${{ secrets.DEPLOY_HOST }}
run: |
@@ -48,17 +52,6 @@ jobs:
docker push ${{ env.REGISTRY }}/${{ env.IMAGE_CPU }}:${{ github.sha }}
"
- name: Build and push GPU image
env:
DEPLOY_HOST: ${{ secrets.DEPLOY_HOST }}
run: |
ssh "root@$DEPLOY_HOST" "
cd /root/art-dag/celery
docker build --build-arg CACHEBUST=\$(date +%s) -f Dockerfile.gpu -t ${{ env.REGISTRY }}/${{ env.IMAGE_GPU }}:latest -t ${{ env.REGISTRY }}/${{ env.IMAGE_GPU }}:${{ github.sha }} .
docker push ${{ env.REGISTRY }}/${{ env.IMAGE_GPU }}:latest
docker push ${{ env.REGISTRY }}/${{ env.IMAGE_GPU }}:${{ github.sha }}
"
- name: Deploy stack
env:
DEPLOY_HOST: ${{ secrets.DEPLOY_HOST }}
@@ -70,22 +63,3 @@ jobs:
sleep 10
docker stack services celery
"
- name: Deploy GPU worker
env:
GPU_HOST: ${{ secrets.GPU_HOST }}
SSH_KEY: ${{ secrets.GPU_SSH_KEY }}
if: ${{ env.GPU_HOST != '' }}
run: |
# Set up GPU SSH if different host
if [ -n "$SSH_KEY" ]; then
echo "$SSH_KEY" > ~/.ssh/gpu_key
chmod 600 ~/.ssh/gpu_key
ssh-keyscan -H "${GPU_HOST#*@}" >> ~/.ssh/known_hosts 2>/dev/null || true
ssh -i ~/.ssh/gpu_key "$GPU_HOST" "
docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_GPU }}:latest
docker stack deploy -c /root/art-dag/celery/docker-compose.yml celery || \
docker service update --image ${{ env.REGISTRY }}/${{ env.IMAGE_GPU }}:latest celery_l1-gpu-worker
"
fi

View File

@@ -1,74 +0,0 @@
name: GPU Worker CI/CD
on:
push:
branches: [main]
paths:
- 'sexp_effects/**'
- 'streaming/**'
- 'tasks/**'
- 'Dockerfile.gpu'
- 'requirements.txt'
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install pytest
- name: Run frame compatibility tests
run: |
pytest tests/test_frame_compatibility.py -v --ignore-glob='*gpu*' || true
# Note: GPU tests skipped on CI (no GPU), but CPU tests must pass
deploy:
needs: test
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v3
- name: Deploy to GPU node
env:
GPU_HOST: ${{ secrets.GPU_HOST }}
SSH_KEY: ${{ secrets.GPU_SSH_KEY }}
run: |
# Set up SSH
mkdir -p ~/.ssh
echo "$SSH_KEY" > ~/.ssh/id_rsa
chmod 600 ~/.ssh/id_rsa
ssh-keyscan -H ${GPU_HOST#*@} >> ~/.ssh/known_hosts 2>/dev/null || true
# Sync code
rsync -avz --delete \
--exclude '.git' \
--exclude '__pycache__' \
--exclude '*.pyc' \
--exclude '.pytest_cache' \
./ "$GPU_HOST:/root/art-dag/celery/"
# Build and restart
ssh "$GPU_HOST" "
cd /root/art-dag/celery
docker build -t git.rose-ash.com/art-dag/l1-gpu-server:latest -f Dockerfile.gpu .
docker kill \$(docker ps -q -f name=l1-gpu-worker) 2>/dev/null || true
echo 'GPU worker restarted'
"
- name: Verify deployment
env:
GPU_HOST: ${{ secrets.GPU_HOST }}
SSH_KEY: ${{ secrets.GPU_SSH_KEY }}
run: |
sleep 15
ssh "$GPU_HOST" "docker logs --tail 20 \$(docker ps -q -f name=l1-gpu-worker)"

View File

@@ -82,8 +82,6 @@ services:
# L2_SERVER, L2_DOMAIN, IPFS_GATEWAY_URL from .env file
volumes:
- l1_cache:/data/cache
# Mount source code for development - restart service to pick up changes
- .:/app
depends_on:
- redis
- postgres
@@ -113,8 +111,6 @@ services:
- ARTDAG_CLUSTER_KEY=${ARTDAG_CLUSTER_KEY:-}
volumes:
- l1_cache:/data/cache
# Mount source code for development - restart service to pick up changes
- .:/app
depends_on:
- redis
- postgres