celery/docker-compose.yml

version: "3.8"

services:
  redis:
    image: redis:7-alpine
    ports:
      - target: 6379
        published: 16379
        mode: host  # Bypass swarm routing mesh
    volumes:
      - redis_data:/data
    networks:
      - celery
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.labels.gpu != true

  postgres:
    image: postgres:16-alpine
    environment:
      - POSTGRES_USER=artdag
      - POSTGRES_PASSWORD=artdag
      - POSTGRES_DB=artdag
    ports:
      - target: 5432
        published: 15432
        mode: host  # Expose for GPU worker on different VPC
    volumes:
      - postgres_data:/var/lib/postgresql/data
    networks:
      - celery
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.labels.gpu != true

  ipfs:
    image: ipfs/kubo:latest
    ports:
      - "4001:4001"      # Swarm TCP
      - "4001:4001/udp"  # Swarm UDP
      - target: 5001
        published: 15001
        mode: host  # API port for GPU worker on different VPC
    volumes:
      - ipfs_data:/data/ipfs
      - l1_cache:/data/cache:ro  # Read-only access to cache for adding files
    networks:
      - celery
      - externalnet  # For gateway access
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.labels.gpu != true

  l1-server:
    image: git.rose-ash.com/art-dag/l1-server:latest
    env_file:
      - .env
    environment:
      - REDIS_URL=redis://redis:6379/5
      - DATABASE_URL=postgresql://artdag:artdag@postgres:5432/artdag
      - ADMIN_TOKEN=artdag-admin-purge-token-2026
      # IPFS_API multiaddr - used for all IPFS operations (add, cat, pin)
      - IPFS_API=/dns/ipfs/tcp/5001
      - CACHE_DIR=/data/cache
      # Set IPFS_PRIMARY=true to use IPFS-primary mode (everything on IPFS, no local cache)
      # - IPFS_PRIMARY=true
      # Cluster key for trust domains - systems with same key can share work via IPFS
      # Generate with: openssl rand -hex 32
      - ARTDAG_CLUSTER_KEY=${ARTDAG_CLUSTER_KEY:-}
      # L2_SERVER, L2_DOMAIN, IPFS_GATEWAY_URL from .env file
    volumes:
      - l1_cache:/data/cache
      # Mount source code for development - restart service to pick up changes
      - .:/app
    depends_on:
      - redis
      - postgres
      - ipfs
    networks:
      - celery
      - externalnet
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.labels.gpu != true

  l1-worker:
    image: git.rose-ash.com/art-dag/l1-server:latest
    command: sh -c "find /app -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null; celery -A celery_app worker --loglevel=info -E"
    environment:
      - REDIS_URL=redis://redis:6379/5
      - DATABASE_URL=postgresql://artdag:artdag@postgres:5432/artdag
      # IPFS_API multiaddr - used for all IPFS operations (add, cat, pin)
      - IPFS_API=/dns/ipfs/tcp/5001
      - CACHE_DIR=/data/cache
      - C_FORCE_ROOT=true
      # Must match l1-server for consistent cache_ids
      - ARTDAG_CLUSTER_KEY=${ARTDAG_CLUSTER_KEY:-}
    volumes:
      - l1_cache:/data/cache
      # Mount source code for development - restart service to pick up changes
      - .:/app
    depends_on:
      - redis
      - postgres
      - ipfs
    networks:
      - celery
    deploy:
      replicas: 2
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.labels.gpu != true

  flower:
    image: mher/flower:2.0
    command: celery --broker=redis://redis:6379/5 flower --port=5555
    environment:
      - CELERY_BROKER_URL=redis://redis:6379/5
      - FLOWER_PORT=5555
    depends_on:
      - redis
    networks:
      - celery
      - externalnet
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.labels.gpu != true

  # GPU worker for streaming/rendering tasks
  # Build: docker build -f Dockerfile.gpu -t git.rose-ash.com/art-dag/l1-gpu-server:latest .
  # Requires: docker node update --label-add gpu=true <gpu-node-name>
  l1-gpu-worker:
    image: git.rose-ash.com/art-dag/l1-gpu-server:latest
    # For local dev, uncomment to build from Dockerfile.gpu:
    # build:
    #   context: .
    #   dockerfile: Dockerfile.gpu
    command: sh -c "cd /app && celery -A celery_app worker --loglevel=info -E -Q gpu,celery"
    environment:
      # GPU node is on different VPC - use public IPs for cross-node communication
      - REDIS_URL=redis://138.68.142.139:16379/5
      - DATABASE_URL=postgresql://artdag:artdag@138.68.142.139:15432/artdag
      # Connect to shared IPFS node on CPU (via public IP)
      - IPFS_API=/ip4/138.68.142.139/tcp/15001
      # Gateway fallback for resilience
      - IPFS_GATEWAYS=https://ipfs.io,https://cloudflare-ipfs.com,https://dweb.link
      # Local cache is ephemeral (tmpfs or local volume)
      - CACHE_DIR=/data/cache
      - C_FORCE_ROOT=true
      - ARTDAG_CLUSTER_KEY=${ARTDAG_CLUSTER_KEY:-}
      # GPU acceleration settings
      - NVIDIA_VISIBLE_DEVICES=all
      # Keep frames on GPU between operations for maximum performance
      - STREAMING_GPU_PERSIST=1
    volumes:
      # Local cache - ephemeral, just for working files
      - gpu_cache:/data/cache
      # Note: No source mount - GPU worker uses code from image
    depends_on:
      - redis
      - postgres
      - ipfs
    networks:
      - celery
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.labels.gpu == true

volumes:
  redis_data:
  postgres_data:
  ipfs_data:
  l1_cache:
  gpu_cache:  # Ephemeral cache for GPU workers

networks:
  celery:
    driver: overlay
  externalnet:
    external: true