oO/infra/docker/docker-compose.yml

name: oo

services:
  # ── core profile ──────────────────────────────────────────────────────────

  api:
    build:
      context: ../..
      dockerfile: infra/docker/Dockerfile.api
    profiles: [core, full]
    env_file: ../../.env.local
    environment:
      NODE_ENV: production
      ML_SERVING_URL: "http://ml-serving:8000"
      MLFLOW_URL: "http://mlflow:5000"
      INTERNAL_API_TOKEN: "${INTERNAL_API_TOKEN:-}"
    volumes:
      - /mnt/ssd/dbs/oo:/mnt/ssd/dbs/oo
    ports:
      - "127.0.0.1:3078:3078"
    healthcheck:
      test: ["CMD", "node", "-e", "fetch('http://localhost:3078/health').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"]
      interval: 10s
      timeout: 5s
      retries: 5

  web:
    build:
      context: ../..
      dockerfile: infra/docker/Dockerfile.web
    profiles: [core, full]
    env_file: ../../.env.local
    environment:
      NODE_ENV: production
      PORT: "3079"
      HOSTNAME: "0.0.0.0"
      NEXT_PUBLIC_API_URL: ""   # Caddy routes /api/* directly to the API in prod
    ports:
      - "127.0.0.1:3079:3079"
    depends_on:
      api:
        condition: service_healthy

  admin:
    build:
      context: ../..
      dockerfile: infra/docker/Dockerfile.admin
    profiles: [core, full]
    env_file: ../../.env.local
    environment:
      NODE_ENV: production
      PORT: "3080"
      HOSTNAME: "0.0.0.0"
      NEXT_PUBLIC_API_URL: ""
      NEXT_PUBLIC_MLFLOW_URL: "/mlflow"
      INTERNAL_API_URL: "http://api:3078"
    ports:
      - "127.0.0.1:3080:3080"
    depends_on:
      api:
        condition: service_healthy

  # ── full profile ──────────────────────────────────────────────────────────

  ml-serving:
    build:
      context: ../..
      dockerfile: infra/docker/Dockerfile.ml
    profiles: [full]
    env_file: ../../.env.local
    environment:
      LITELLM_URL: ${LITELLM_URL:-http://host.docker.internal:4000}
      OLLAMA_URL: ${OLLAMA_URL:-http://host.docker.internal:11434}
      MLFLOW_TRACKING_URI: ${MLFLOW_TRACKING_URI:-}
    extra_hosts:
      - "host.docker.internal:host-gateway"
    ports:
      - "127.0.0.1:8000:8000"
    healthcheck:
      test: ["CMD", "python", "-c", "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:8000/health',timeout=3).status==200 else 1)"]
      interval: 10s
      timeout: 5s
      retries: 5

  # ── mlops profile — MLflow ────────────────────────────────────────────────
  # Start: docker compose --profile mlops up
  # MLflow UI:  http://localhost:5000  or  https://o.alogins.net/mlflow

  # ── events profile — NATS JetStream ─────────────────────────────────────
  # Start: docker compose --profile events up
  # NATS monitoring: http://localhost:8222
  # Enable in the API by setting NATS_URL=nats://nats:4222 in .env.local

  nats:
    image: nats:2.10-alpine
    profiles: [events, full]
    command: ["-js", "-sd", "/data", "-m", "8222"]
    volumes:
      - /mnt/ssd/dbs/oo/nats:/data
    ports:
      - "127.0.0.1:4222:4222"   # client connections
      - "127.0.0.1:8222:8222"   # HTTP monitoring
    healthcheck:
      test: ["CMD", "wget", "--spider", "-q", "http://localhost:8222/healthz"]
      interval: 10s
      timeout: 5s
      retries: 5

  mlflow:
    image: ghcr.io/mlflow/mlflow:v3.11.1
    profiles: [mlops]
    command: >
      mlflow server
      --backend-store-uri sqlite:////mlflow/mlflow.db
      --default-artifact-root /mlflow/artifacts
      --host 0.0.0.0
      --port 5000
      --static-prefix /mlflow
      --allowed-hosts o.alogins.net,localhost
      --cors-allowed-origins https://o.alogins.net
    volumes:
      - /mnt/ssd/dbs/oo/mlflow:/mlflow
    ports:
      - "127.0.0.1:5000:5000"
    healthcheck:
      test: ["CMD", "python", "-c", "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:5000/mlflow/health',timeout=3).status==200 else 1)"]
      interval: 10s
      timeout: 5s
      retries: 5