{ "client": { "drop_excess_requests": false }, "providers": { "ollama": { "keys": [ { "name": "ollama-gpu", "value": "dummy", "models": [ "qwen2.5:0.5b", "qwen2.5:1.5b", "qwen3:4b", "gemma3:4b", "qwen3:8b" ], "weight": 1.0 } ], "network_config": { "base_url": "http://host.docker.internal:11436", "default_request_timeout_in_seconds": 300, "max_retries": 2, "retry_backoff_initial_ms": 500, "retry_backoff_max_ms": 10000 } }, "ollama-cpu": { "keys": [ { "name": "ollama-cpu-key", "value": "dummy", "models": [ "gemma3:1b", "qwen2.5:1.5b", "qwen2.5:3b" ], "weight": 1.0 } ], "network_config": { "base_url": "http://host.docker.internal:11435", "default_request_timeout_in_seconds": 120, "max_retries": 2, "retry_backoff_initial_ms": 500, "retry_backoff_max_ms": 10000 }, "custom_provider_config": { "base_provider_type": "openai", "allowed_requests": { "chat_completion": true, "chat_completion_stream": true } } } } }