celery/docker-compose.yml

version: "3.8"

services:
  redis:
    image: redis:7-alpine
    ports:
      - target: 6379
        published: 16379
        mode: host  # Bypass swarm routing mesh
    volumes:
      - redis_data:/data
    networks:
      - celery
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.labels.gpu != true

  postgres:
    image: postgres:16-alpine
    env_file:
      - .env
    environment:
      - POSTGRES_USER=artdag
      - POSTGRES_DB=artdag
    ports:
      - target: 5432
        published: 15432
        mode: host  # Expose for GPU worker on different VPC
    volumes:
      - postgres_data:/var/lib/postgresql/data
    networks:
      - celery
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.labels.gpu != true

  ipfs:
    image: ipfs/kubo:latest
    ports:
      - "4001:4001"      # Swarm TCP
      - "4001:4001/udp"  # Swarm UDP
      - target: 5001
        published: 15001
        mode: host  # API port for GPU worker on different VPC
    volumes:
      - ipfs_data:/data/ipfs
      - l1_cache:/data/cache:ro  # Read-only access to cache for adding files
    networks:
      - celery
      - externalnet  # For gateway access
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.labels.gpu != true

  l1-server:
    image: registry.rose-ash.com:5000/celery-l1-server:latest
    env_file:
      - .env
    environment:
      - REDIS_URL=redis://redis:6379/5
      # IPFS_API multiaddr - used for all IPFS operations (add, cat, pin)
      - IPFS_API=/dns/ipfs/tcp/5001
      - CACHE_DIR=/data/cache
      # DATABASE_URL, ADMIN_TOKEN, ARTDAG_CLUSTER_KEY,
      # L2_SERVER, L2_DOMAIN, IPFS_GATEWAY_URL from .env file
    healthcheck:
      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8100/')"]
      interval: 10s
      timeout: 5s
      retries: 3
      start_period: 15s
    volumes:
      - l1_cache:/data/cache
    depends_on:
      - redis
      - postgres
      - ipfs
    networks:
      - celery
      - externalnet
    deploy:
      replicas: 1
      update_config:
        order: start-first
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.labels.gpu != true

  l1-worker:
    image: registry.rose-ash.com:5000/celery-l1-server:latest
    command: sh -c "find /app -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null; celery -A celery_app worker --loglevel=info -E"
    env_file:
      - .env
    environment:
      - REDIS_URL=redis://redis:6379/5
      # IPFS_API multiaddr - used for all IPFS operations (add, cat, pin)
      - IPFS_API=/dns/ipfs/tcp/5001
      - CACHE_DIR=/data/cache
      - C_FORCE_ROOT=true
      # DATABASE_URL, ARTDAG_CLUSTER_KEY from .env file
    volumes:
      - l1_cache:/data/cache
    depends_on:
      - redis
      - postgres
      - ipfs
    networks:
      - celery
    deploy:
      replicas: 2
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.labels.gpu != true

  flower:
    image: mher/flower:2.0
    command: celery --broker=redis://redis:6379/5 flower --port=5555
    environment:
      - CELERY_BROKER_URL=redis://redis:6379/5
      - FLOWER_PORT=5555
    depends_on:
      - redis
    networks:
      - celery
      - externalnet
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.labels.gpu != true

  # GPU worker for streaming/rendering tasks
  # Build: docker build -f Dockerfile.gpu -t registry.rose-ash.com:5000/celery-l1-gpu-server:latest .
  # Requires: docker node update --label-add gpu=true <gpu-node-name>
  l1-gpu-worker:
    image: registry.rose-ash.com:5000/celery-l1-gpu-server:latest
    command: sh -c "cd /app && celery -A celery_app worker --loglevel=info -E -Q gpu,celery"
    env_file:
      - .env.gpu
    volumes:
      # Local cache - ephemeral, just for working files
      - gpu_cache:/data/cache
      # Note: No source mount - GPU worker uses code from image
    depends_on:
      - redis
      - postgres
      - ipfs
    networks:
      - celery
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.labels.gpu == true

volumes:
  redis_data:
  postgres_data:
  ipfs_data:
  l1_cache:
  gpu_cache:  # Ephemeral cache for GPU workers

networks:
  celery:
    driver: overlay
  externalnet:
    external: true