Infrastructure: - Add `mlops` compose profile: MLflow (basic-auth, /mlflow path) + Airflow (LocalExecutor, /airflow path) + airflow-db - infra/mlflow/basic_auth.ini for MLflow auth config - Caddy routes /mlflow* and /airflow* inside existing o.alogins.net block (see agap_git) - Dockerfile.admin: NEXT_PUBLIC_MLFLOW_URL / NEXT_PUBLIC_AIRFLOW_URL build args (default /mlflow, /airflow) Admin panel: - /admin/models: replace MLflow iframe with external link cards - /admin/experiments: replace LinUCB stats with MLOps hub (links to MLflow experiments/models + Airflow DAGs/datasets) - AdminShell: external nav links for MLflow ↗ and Airflow ↗ under MLOps section Docs & planning: - README: new AI stack section (Ollama/LiteLLM/OpenWebUI three-tier, tip generation pipeline, model aliases) - README: Phase 2 expanded with AI infra issues (#86-#93) and granular pipeline breakdown - README: Phase 4 expanded with LLM MLOps items (#94-#97) - CLAUDE.md: AI stack section, updated current phase (M1 shipped / M2 in progress), compose profiles, updated What NOT to do - docs/architecture/overview.md: AI stack section, updated decision flow diagram for Phase 2 LLM pipeline - ADR-0006: updated to reflect external services (path-based, not embedded) - Gitea issues #86-#97 created (M2: AI infra + pipeline; M4: LLM MLOps) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
181 lines
5.6 KiB
YAML
181 lines
5.6 KiB
YAML
name: oo
|
|
|
|
services:
|
|
# ── core profile ──────────────────────────────────────────────────────────
|
|
|
|
api:
|
|
build:
|
|
context: ../..
|
|
dockerfile: infra/docker/Dockerfile.api
|
|
profiles: [core, full]
|
|
env_file: ../../.env.local
|
|
environment:
|
|
NODE_ENV: production
|
|
volumes:
|
|
- /mnt/ssd/dbs/oo:/mnt/ssd/dbs/oo
|
|
ports:
|
|
- "127.0.0.1:3078:3078"
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3078/health"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 5
|
|
|
|
web:
|
|
build:
|
|
context: ../..
|
|
dockerfile: infra/docker/Dockerfile.web
|
|
profiles: [core, full]
|
|
env_file: ../../.env.local
|
|
environment:
|
|
NODE_ENV: production
|
|
PORT: "3079"
|
|
HOSTNAME: "0.0.0.0"
|
|
NEXT_PUBLIC_API_URL: "" # Caddy routes /api/* directly to the API in prod
|
|
ports:
|
|
- "127.0.0.1:3079:3079"
|
|
depends_on:
|
|
api:
|
|
condition: service_healthy
|
|
|
|
admin:
|
|
build:
|
|
context: ../..
|
|
dockerfile: infra/docker/Dockerfile.admin
|
|
profiles: [core, full]
|
|
env_file: ../../.env.local
|
|
environment:
|
|
NODE_ENV: production
|
|
PORT: "3080"
|
|
HOSTNAME: "0.0.0.0"
|
|
NEXT_PUBLIC_API_URL: ""
|
|
INTERNAL_API_URL: "http://api:3078"
|
|
ports:
|
|
- "127.0.0.1:3080:3080"
|
|
depends_on:
|
|
api:
|
|
condition: service_healthy
|
|
|
|
# ── full profile ──────────────────────────────────────────────────────────
|
|
|
|
ml-serving:
|
|
build:
|
|
context: ../..
|
|
dockerfile: infra/docker/Dockerfile.ml
|
|
profiles: [full]
|
|
ports:
|
|
- "127.0.0.1:8000:8000"
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--spider", "-q", "http://localhost:8000/health"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 5
|
|
|
|
# ── mlops profile — MLflow + Airflow ──────────────────────────────────────
|
|
# Start: docker compose --profile mlops up
|
|
# MLflow UI: http://localhost:5000 or https://o.alogins.net/mlflow (admin / password — change via basic_auth.ini)
|
|
# Airflow UI: http://localhost:8080/airflow or https://o.alogins.net/airflow (admin / AIRFLOW_ADMIN_PASSWORD)
|
|
# Caddy routes /mlflow* and /airflow* inside the o.alogins.net block
|
|
|
|
airflow-db:
|
|
image: postgres:16-alpine
|
|
profiles: [mlops]
|
|
environment:
|
|
POSTGRES_DB: airflow
|
|
POSTGRES_USER: airflow
|
|
POSTGRES_PASSWORD: ${AIRFLOW_DB_PASSWORD:-airflow}
|
|
volumes:
|
|
- /mnt/ssd/dbs/oo/airflow-db:/var/lib/postgresql/data
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U airflow"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 5
|
|
|
|
airflow-init:
|
|
image: apache/airflow:2.9.3
|
|
profiles: [mlops]
|
|
entrypoint: /bin/bash
|
|
command:
|
|
- -c
|
|
- |
|
|
airflow db migrate
|
|
airflow users create \
|
|
--username admin \
|
|
--firstname Admin \
|
|
--lastname User \
|
|
--role Admin \
|
|
--email admin@oo.local \
|
|
--password "$${AIRFLOW_ADMIN_PASSWORD:-admin}"
|
|
environment:
|
|
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:${AIRFLOW_DB_PASSWORD:-airflow}@airflow-db/airflow
|
|
AIRFLOW__CORE__EXECUTOR: LocalExecutor
|
|
AIRFLOW__WEBSERVER__SECRET_KEY: ${AIRFLOW_SECRET_KEY:-change-me-in-prod}
|
|
AIRFLOW__WEBSERVER__BASE_URL: ${AIRFLOW_BASE_URL:-https://o.alogins.net/airflow}
|
|
depends_on:
|
|
airflow-db:
|
|
condition: service_healthy
|
|
restart: "no"
|
|
|
|
airflow-webserver:
|
|
image: apache/airflow:2.9.3
|
|
profiles: [mlops]
|
|
command: webserver
|
|
environment:
|
|
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:${AIRFLOW_DB_PASSWORD:-airflow}@airflow-db/airflow
|
|
AIRFLOW__CORE__EXECUTOR: LocalExecutor
|
|
AIRFLOW__WEBSERVER__SECRET_KEY: ${AIRFLOW_SECRET_KEY:-change-me-in-prod}
|
|
AIRFLOW__CORE__FERNET_KEY: ${AIRFLOW_FERNET_KEY:-}
|
|
AIRFLOW__WEBSERVER__BASE_URL: ${AIRFLOW_BASE_URL:-https://o.alogins.net/airflow}
|
|
volumes:
|
|
- ../../ml/pipelines:/opt/airflow/dags:ro
|
|
ports:
|
|
- "127.0.0.1:8080:8080"
|
|
depends_on:
|
|
airflow-init:
|
|
condition: service_completed_successfully
|
|
healthcheck:
|
|
test: ["CMD", "curl", "--fail", "http://localhost:8080/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 5
|
|
start_period: 60s
|
|
|
|
airflow-scheduler:
|
|
image: apache/airflow:2.9.3
|
|
profiles: [mlops]
|
|
command: scheduler
|
|
environment:
|
|
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:${AIRFLOW_DB_PASSWORD:-airflow}@airflow-db/airflow
|
|
AIRFLOW__CORE__EXECUTOR: LocalExecutor
|
|
AIRFLOW__CORE__FERNET_KEY: ${AIRFLOW_FERNET_KEY:-}
|
|
volumes:
|
|
- ../../ml/pipelines:/opt/airflow/dags:ro
|
|
depends_on:
|
|
airflow-init:
|
|
condition: service_completed_successfully
|
|
|
|
mlflow:
|
|
image: ghcr.io/mlflow/mlflow:2.14.3
|
|
profiles: [mlops]
|
|
command: >
|
|
mlflow server
|
|
--backend-store-uri sqlite:////mlflow/mlflow.db
|
|
--default-artifact-root /mlflow/artifacts
|
|
--host 0.0.0.0
|
|
--port 5000
|
|
--app-name basic-auth
|
|
--static-prefix /mlflow
|
|
environment:
|
|
MLFLOW_AUTH_CONFIG_PATH: /mlflow/basic_auth.ini
|
|
volumes:
|
|
- /mnt/ssd/dbs/oo/mlflow:/mlflow
|
|
- ../../infra/mlflow/basic_auth.ini:/mlflow/basic_auth.ini:ro
|
|
ports:
|
|
- "127.0.0.1:5000:5000"
|
|
healthcheck:
|
|
test: ["CMD", "curl", "--fail", "http://localhost:5000/health"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 5
|