-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
145 lines (134 loc) · 3.7 KB
/
docker-compose.yml
File metadata and controls
145 lines (134 loc) · 3.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
version: "3.9"
services:
postgres:
image: pgvector/pgvector:pg16
container_name: llm_postgres
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: rag
ports:
- "5432:5432"
volumes:
- pgdata:/var/lib/postgresql/data
inference:
image: vllm/vllm-openai:latest
container_name: llm_inference
deploy:
resources:
reservations:
devices:
- capabilities: [gpu]
environment:
HF_TOKEN: ${HF_TOKEN}
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
VLLM_MODEL: ${VLLM_MODEL:-meta-llama/Meta-Llama-3-8B-Instruct}
command: >
--model ${VLLM_MODEL:-meta-llama/Meta-Llama-3-8B-Instruct}
--host 0.0.0.0
--port 8001
--served-model-name llm
--gpu-memory-utilization 0.8
--max-model-len 2048
ports:
- "8001:8001"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8001/v1/models"]
interval: 20s
timeout: 5s
retries: 10
retrieval:
build: ./retrieval
container_name: llm_retrieval
environment:
DATABASE_URL: postgresql+psycopg://postgres:postgres@postgres:5432/rag
EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
OTEL_SERVICE_NAME: retrieval
ports:
- "8002:8002"
depends_on:
- postgres
- otel-collector
gateway:
build: ./gateway
container_name: llm_gateway
environment:
INFERENCE_BASE_URL: http://inference:8001
RETRIEVAL_BASE_URL: http://retrieval:8002
LANGGRAPH_BASE_URL: http://orchestrator:8003
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
OTEL_SERVICE_NAME: gateway
ports:
- "8000:8000"
depends_on:
- inference
- retrieval
- orchestrator
- otel-collector
orchestrator:
build: ./orchestrator
container_name: llm_orchestrator
environment:
RETRIEVAL_BASE_URL: http://retrieval:8002
INFERENCE_BASE_URL: http://inference:8001
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
OTEL_SERVICE_NAME: orchestrator
ports:
- "8003:8003"
depends_on:
- retrieval
- inference
- otel-collector
pipelines:
build: ./pipelines
container_name: llm_pipelines
environment:
DATABASE_URL: postgresql+psycopg://postgres:postgres@postgres:5432/rag
EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2
HF_TOKEN: ${HF_TOKEN}
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
OTEL_SERVICE_NAME: pipelines
ports:
- "4000:4000"
depends_on:
- postgres
- otel-collector
otel-collector:
image: otel/opentelemetry-collector:0.95.0
container_name: llm_otel_collector
command: ["--config=/etc/otel-collector-config.yaml"]
volumes:
- ./infra/otel/otel-collector-config.yaml:/etc/otel-collector-config.yaml
ports:
- "4317:4317"
- "4318:4318"
jaeger:
image: jaegertracing/all-in-one:1.56
container_name: llm_jaeger
environment:
COLLECTOR_OTLP_ENABLED: "true"
ports:
- "16686:16686"
depends_on:
- otel-collector
prometheus:
image: prom/prometheus:v2.52.0
container_name: llm_prometheus
volumes:
- ./infra/otel/prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090"
grafana:
image: grafana/grafana:10.4.2
container_name: llm_grafana
ports:
- "3000:3000"
volumes:
- ./infra/otel/grafana/dashboards:/var/lib/grafana/dashboards
- ./infra/otel/grafana/provisioning:/etc/grafana/provisioning
depends_on:
- prometheus
volumes:
pgdata: