From d7a242394011612349f883b169ac82d0cc9d1a44 Mon Sep 17 00:00:00 2001 From: alvis Date: Sat, 18 Apr 2026 15:04:18 +0000 Subject: [PATCH] fix(infra): mlflow image tag + python-based healthchecks for ml-serving/mlflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Corrects mlflow image tag (2.14.3 → v2.14.3); the former tag does not exist on ghcr.io/mlflow/mlflow and caused a manifest-unknown error on pull. - Replaces wget/curl healthchecks with inline python urllib calls — the python:3.12-slim (ml-serving) and ghcr.io/mlflow/mlflow images ship neither wget nor curl, so both containers reported unhealthy despite /health returning 200. Co-Authored-By: Claude Opus 4.7 --- infra/docker/docker-compose.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/infra/docker/docker-compose.yml b/infra/docker/docker-compose.yml index 1700f10..5eec49c 100644 --- a/infra/docker/docker-compose.yml +++ b/infra/docker/docker-compose.yml @@ -70,7 +70,7 @@ services: ports: - "127.0.0.1:8000:8000" healthcheck: - test: ["CMD", "wget", "--spider", "-q", "http://localhost:8000/health"] + test: ["CMD", "python", "-c", "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:8000/health',timeout=3).status==200 else 1)"] interval: 10s timeout: 5s retries: 5 @@ -219,7 +219,7 @@ services: retries: 5 mlflow: - image: ghcr.io/mlflow/mlflow:2.14.3 + image: ghcr.io/mlflow/mlflow:v2.14.3 profiles: [mlops] command: > mlflow server @@ -237,7 +237,7 @@ services: ports: - "127.0.0.1:5000:5000" healthcheck: - test: ["CMD", "curl", "--fail", "http://localhost:5000/health"] + test: ["CMD", "python", "-c", "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:5000/health',timeout=3).status==200 else 1)"] interval: 10s timeout: 5s retries: 5