Created
March 16, 2026 02:54
-
-
Save knowlet/d01ef6666233069d082eaa326fe74d54 to your computer and use it in GitHub Desktop.
litellm + autoheal + traefik + cloudflare
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| services: | |
| traefik: | |
| image: traefik:v3.6.6 | |
| command: | |
| - --providers.docker=true | |
| - --providers.docker.exposedbydefault=false | |
| - --providers.file.filename=/etc/traefik/dynamic.yml | |
| - --entrypoints.websecure.address=:443 | |
| - --entrypoints.metrics.address=:9100 | |
| - --metrics.prometheus=true | |
| - --metrics.prometheus.entrypoint=metrics | |
| - --metrics.prometheus.addEntryPointsLabels=true | |
| - --metrics.prometheus.addRoutersLabels=true | |
| - --metrics.prometheus.addServicesLabels=true | |
| ports: | |
| - "443:443" | |
| volumes: | |
| - /var/run/docker.sock:/var/run/docker.sock:ro | |
| - ./traefik_dynamic.yml:/etc/traefik/dynamic.yml:ro | |
| - ./certs:/certs:ro | |
| restart: unless-stopped | |
| autoheal: | |
| deploy: | |
| replicas: 1 | |
| image: willfarrell/autoheal:latest | |
| network_mode: none | |
| restart: always | |
| volumes: | |
| - /etc/localtime:/etc/localtime:ro | |
| - /var/run/docker.sock:/var/run/docker.sock | |
| litellm: | |
| restart: unless-stopped | |
| build: | |
| context: . | |
| args: | |
| target: runtime | |
| image: ghcr.io/berriai/litellm:main-stable | |
| expose: | |
| - 4000 | |
| volumes: | |
| - ./config.yaml:/app/config.yaml | |
| command: | |
| - "--config=/app/config.yaml" | |
| environment: | |
| DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm" | |
| STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI | |
| env_file: | |
| - .env # Load local .env file | |
| depends_on: | |
| - db # Indicates that this service depends on the 'db' service, ensuring 'db' starts first | |
| healthcheck: # Defines the health check configuration for the container | |
| test: | |
| - CMD-SHELL | |
| # - python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/readness')" # Command to execute for health check | |
| - python3 -c "import requests, sys, json; r=requests.get('http://localhost:4000/health/readiness', timeout=5); data=r.json(); assert data['status']=='connected' and data.get('db')=='connected';" || exit 1 | |
| interval: 30s # Perform health check every 30 seconds | |
| timeout: 10s # Health check command times out after 10 seconds | |
| retries: 3 # Retry up to 3 times if health check fails | |
| start_period: 40s # Wait 40 seconds after container start before beginning health checks | |
| labels: | |
| - "traefik.enable=true" | |
| - "traefik.http.routers.litellm.rule=Host(`ai.lkc-lab.com`)" | |
| - "traefik.http.routers.litellm.entrypoints=websecure" | |
| - "traefik.http.routers.litellm.tls=true" | |
| - "autoheal=true" | |
| db: | |
| image: postgres:16 | |
| restart: always | |
| container_name: litellm_db | |
| environment: | |
| POSTGRES_DB: litellm | |
| POSTGRES_USER: llmproxy | |
| POSTGRES_PASSWORD: dbpassword9090 | |
| volumes: | |
| - postgres_data:/var/lib/postgresql/data # Persists Postgres data across container restarts | |
| healthcheck: | |
| test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"] | |
| interval: 1s | |
| timeout: 5s | |
| retries: 10 | |
| prometheus: | |
| image: prom/prometheus | |
| volumes: | |
| - prometheus_data:/prometheus | |
| - ./prometheus.yml:/etc/prometheus/prometheus.yml | |
| command: | |
| - "--config.file=/etc/prometheus/prometheus.yml" | |
| - "--storage.tsdb.path=/prometheus" | |
| - "--storage.tsdb.retention.time=15d" | |
| restart: always | |
| volumes: | |
| prometheus_data: | |
| driver: local | |
| postgres_data: | |
| name: litellm_postgres_data # Named volume for Postgres data persistence |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment