Distributed-LLM-Inference-RAG-Platform/docker-compose.yml at main · hans2001/Distributed-LLM-Inference-RAG-Platform · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
version: "3.9"

services:
  postgres:
    image: pgvector/pgvector:pg16
    container_name: llm_postgres
    environment:
      POSTGRES_USER: postgres
      POSTGRES_PASSWORD: postgres
      POSTGRES_DB: rag
    ports:
      - "5432:5432"
    volumes:
      - pgdata:/var/lib/postgresql/data

  inference:
    image: vllm/vllm-openai:latest
    container_name: llm_inference
    deploy:
      resources:
        reservations:
          devices:
            - capabilities: [gpu]
    environment:
      HF_TOKEN: ${HF_TOKEN}
      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
      VLLM_MODEL: ${VLLM_MODEL:-meta-llama/Meta-Llama-3-8B-Instruct}
    command: >
      --model ${VLLM_MODEL:-meta-llama/Meta-Llama-3-8B-Instruct}
      --host 0.0.0.0
      --port 8001
      --served-model-name llm
      --gpu-memory-utilization 0.8
      --max-model-len 2048
    ports:
      - "8001:8001"
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8001/v1/models"]
      interval: 20s
      timeout: 5s
      retries: 10

  retrieval:
    build: ./retrieval
    container_name: llm_retrieval
    environment:
      DATABASE_URL: postgresql+psycopg://postgres:postgres@postgres:5432/rag
      EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2
      OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
      OTEL_SERVICE_NAME: retrieval
    ports:
      - "8002:8002"
    depends_on:
      - postgres
      - otel-collector

  gateway:
    build: ./gateway
    container_name: llm_gateway
    environment:
      INFERENCE_BASE_URL: http://inference:8001
      RETRIEVAL_BASE_URL: http://retrieval:8002
      LANGGRAPH_BASE_URL: http://orchestrator:8003
      OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
      OTEL_SERVICE_NAME: gateway
    ports:
      - "8000:8000"
    depends_on:
      - inference
      - retrieval
      - orchestrator
      - otel-collector

  orchestrator:
    build: ./orchestrator
    container_name: llm_orchestrator
    environment:
      RETRIEVAL_BASE_URL: http://retrieval:8002
      INFERENCE_BASE_URL: http://inference:8001
      OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
      OTEL_SERVICE_NAME: orchestrator
    ports:
      - "8003:8003"
    depends_on:
      - retrieval
      - inference
      - otel-collector

  pipelines:
    build: ./pipelines
    container_name: llm_pipelines
    environment:
      DATABASE_URL: postgresql+psycopg://postgres:postgres@postgres:5432/rag
      EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2
      HF_TOKEN: ${HF_TOKEN}
      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
      OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
      OTEL_SERVICE_NAME: pipelines
    ports:
      - "4000:4000"
    depends_on:
      - postgres
      - otel-collector

  otel-collector:
    image: otel/opentelemetry-collector:0.95.0
    container_name: llm_otel_collector
    command: ["--config=/etc/otel-collector-config.yaml"]
    volumes:
      - ./infra/otel/otel-collector-config.yaml:/etc/otel-collector-config.yaml
    ports:
      - "4317:4317"
      - "4318:4318"

  jaeger:
    image: jaegertracing/all-in-one:1.56
    container_name: llm_jaeger
    environment:
      COLLECTOR_OTLP_ENABLED: "true"
    ports:
      - "16686:16686"
    depends_on:
      - otel-collector

  prometheus:
    image: prom/prometheus:v2.52.0
    container_name: llm_prometheus
    volumes:
      - ./infra/otel/prometheus.yml:/etc/prometheus/prometheus.yml
    ports:
      - "9090:9090"

  grafana:
    image: grafana/grafana:10.4.2
    container_name: llm_grafana
    ports:
      - "3000:3000"
    volumes:
      - ./infra/otel/grafana/dashboards:/var/lib/grafana/dashboards
      - ./infra/otel/grafana/provisioning:/etc/grafana/provisioning
    depends_on:
      - prometheus

volumes:
  pgdata: