name: oo services: # ── core profile ────────────────────────────────────────────────────────── api: build: context: ../.. dockerfile: infra/docker/Dockerfile.api profiles: [core, full] env_file: ../../.env.local environment: NODE_ENV: production volumes: - /mnt/ssd/dbs/oo:/mnt/ssd/dbs/oo ports: - "127.0.0.1:3078:3078" healthcheck: test: ["CMD", "wget", "--spider", "-q", "http://localhost:3078/health"] interval: 10s timeout: 5s retries: 5 web: build: context: ../.. dockerfile: infra/docker/Dockerfile.web profiles: [core, full] env_file: ../../.env.local environment: NODE_ENV: production PORT: "3079" HOSTNAME: "0.0.0.0" NEXT_PUBLIC_API_URL: "" # Caddy routes /api/* directly to the API in prod ports: - "127.0.0.1:3079:3079" depends_on: api: condition: service_healthy admin: build: context: ../.. dockerfile: infra/docker/Dockerfile.admin profiles: [core, full] env_file: ../../.env.local environment: NODE_ENV: production PORT: "3080" HOSTNAME: "0.0.0.0" NEXT_PUBLIC_API_URL: "" INTERNAL_API_URL: "http://api:3078" ports: - "127.0.0.1:3080:3080" depends_on: api: condition: service_healthy # ── full profile ────────────────────────────────────────────────────────── ml-serving: build: context: ../.. dockerfile: infra/docker/Dockerfile.ml profiles: [full] env_file: ../../.env.local environment: LITELLM_URL: ${LITELLM_URL:-https://llm.alogins.net} OLLAMA_URL: ${OLLAMA_URL:-http://host.docker.internal:11434} extra_hosts: - "host.docker.internal:host-gateway" ports: - "127.0.0.1:8000:8000" healthcheck: test: ["CMD", "python", "-c", "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:8000/health',timeout=3).status==200 else 1)"] interval: 10s timeout: 5s retries: 5 # ── mlops profile — MLflow + Airflow ────────────────────────────────────── # Start: docker compose --profile mlops up # MLflow UI: http://localhost:5000 or https://o.alogins.net/mlflow (admin / password — change via basic_auth.ini) # Airflow UI: http://localhost:8080/airflow or https://o.alogins.net/airflow (admin / AIRFLOW_ADMIN_PASSWORD) # Caddy routes /mlflow* and /airflow* inside the o.alogins.net block airflow-db: image: postgres:16-alpine profiles: [mlops] environment: POSTGRES_DB: airflow POSTGRES_USER: airflow POSTGRES_PASSWORD: ${AIRFLOW_DB_PASSWORD:-airflow} volumes: - /mnt/ssd/dbs/oo/airflow-db:/var/lib/postgresql/data healthcheck: test: ["CMD-SHELL", "pg_isready -U airflow"] interval: 10s timeout: 5s retries: 5 airflow-init: image: apache/airflow:2.9.3 profiles: [mlops] entrypoint: /bin/bash command: - -c - | airflow db migrate airflow users create \ --username admin \ --firstname Admin \ --lastname User \ --role Admin \ --email admin@oo.local \ --password "$${AIRFLOW_ADMIN_PASSWORD:-admin}" environment: AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:${AIRFLOW_DB_PASSWORD:-airflow}@airflow-db/airflow AIRFLOW__CORE__EXECUTOR: LocalExecutor AIRFLOW__WEBSERVER__SECRET_KEY: ${AIRFLOW_SECRET_KEY:-change-me-in-prod} AIRFLOW__WEBSERVER__BASE_URL: ${AIRFLOW_BASE_URL:-https://o.alogins.net/airflow} depends_on: airflow-db: condition: service_healthy restart: "no" airflow-webserver: image: apache/airflow:2.9.3 profiles: [mlops] command: webserver environment: AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:${AIRFLOW_DB_PASSWORD:-airflow}@airflow-db/airflow AIRFLOW__CORE__EXECUTOR: LocalExecutor AIRFLOW__WEBSERVER__SECRET_KEY: ${AIRFLOW_SECRET_KEY:-change-me-in-prod} AIRFLOW__CORE__FERNET_KEY: ${AIRFLOW_FERNET_KEY:-} AIRFLOW__WEBSERVER__BASE_URL: ${AIRFLOW_BASE_URL:-https://o.alogins.net/airflow} volumes: - ../../ml/pipelines:/opt/airflow/dags:ro ports: - "127.0.0.1:8080:8080" depends_on: airflow-init: condition: service_completed_successfully healthcheck: test: ["CMD", "curl", "--fail", "http://localhost:8080/health"] interval: 30s timeout: 10s retries: 5 start_period: 60s airflow-scheduler: image: apache/airflow:2.9.3 profiles: [mlops] command: scheduler environment: AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:${AIRFLOW_DB_PASSWORD:-airflow}@airflow-db/airflow AIRFLOW__CORE__EXECUTOR: LocalExecutor AIRFLOW__CORE__FERNET_KEY: ${AIRFLOW_FERNET_KEY:-} volumes: - ../../ml/pipelines:/opt/airflow/dags:ro depends_on: airflow-init: condition: service_completed_successfully # ── events profile — NATS JetStream ───────────────────────────────────── # Start: docker compose --profile events up # NATS monitoring: http://localhost:8222 # Enable in the API by setting NATS_URL=nats://nats:4222 in .env.local nats: image: nats:2.10-alpine profiles: [events, full] command: ["-js", "-sd", "/data", "-m", "8222"] volumes: - /mnt/ssd/dbs/oo/nats:/data ports: - "127.0.0.1:4222:4222" # client connections - "127.0.0.1:8222:8222" # HTTP monitoring healthcheck: test: ["CMD", "wget", "--spider", "-q", "http://localhost:8222/healthz"] interval: 10s timeout: 5s retries: 5 mlflow: image: ghcr.io/mlflow/mlflow:v2.14.3 profiles: [mlops] command: > mlflow server --backend-store-uri sqlite:////mlflow/mlflow.db --default-artifact-root /mlflow/artifacts --host 0.0.0.0 --port 5000 --app-name basic-auth --static-prefix /mlflow environment: MLFLOW_AUTH_CONFIG_PATH: /mlflow/basic_auth.ini volumes: - /mnt/ssd/dbs/oo/mlflow:/mlflow - ../../infra/mlflow/basic_auth.ini:/mlflow/basic_auth.ini:ro ports: - "127.0.0.1:5000:5000" healthcheck: test: ["CMD", "python", "-c", "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:5000/health',timeout=3).status==200 else 1)"] interval: 10s timeout: 5s retries: 5