forked from infiniflow/ragflow
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathentrypoint.sh
More file actions
executable file
·342 lines (309 loc) · 10.6 KB
/
entrypoint.sh
File metadata and controls
executable file
·342 lines (309 loc) · 10.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
#!/usr/bin/env bash
set -e
echo "Start RAGFlow cluster, version: "
cat /ragflow/VERSION
# -----------------------------------------------------------------------------
# Usage and command-line argument parsing
# -----------------------------------------------------------------------------
function usage() {
echo "Usage: $0 [--disable-webserver] [--disable-taskexecutor] [--disable-datasync] [--consumer-no-beg=<num>] [--consumer-no-end=<num>] [--workers=<num>] [--host-id=<string>]"
echo
echo " --disable-webserver Disables the web server (nginx + ragflow_server)."
echo " --disable-taskexecutor Disables task executor workers."
echo " --disable-datasync Disables synchronization of datasource workers."
echo " --enable-mcpserver Enables the MCP server."
echo " --enable-adminserver Enables the Admin server."
echo " --init-superuser Initializes the superuser."
echo " --consumer-no-beg=<num> Start range for consumers (if using range-based)."
echo " --consumer-no-end=<num> End range for consumers (if using range-based)."
echo " --workers=<num> Number of task executors to run (if range is not used)."
echo " --host-id=<string> Unique ID for the host (defaults to \`hostname\`)."
echo
echo "Examples:"
echo " $0 --disable-taskexecutor"
echo " $0 --disable-webserver --consumer-no-beg=0 --consumer-no-end=5"
echo " $0 --disable-webserver --workers=2 --host-id=myhost123"
echo " $0 --enable-mcpserver"
echo " $0 --enable-adminserver"
echo " $0 --init-superuser"
exit 1
}
ENABLE_WEBSERVER=1 # Default to enable web server
ENABLE_TASKEXECUTOR=1 # Default to enable task executor
ENABLE_DATASYNC=1
ENABLE_MCP_SERVER=0
ENABLE_ADMIN_SERVER=0 # Default close admin server
INIT_SUPERUSER_ARGS="" # Default to not initialize superuser
CONSUMER_NO_BEG=0
CONSUMER_NO_END=0
WORKERS=1
MCP_HOST="127.0.0.1"
MCP_PORT=9382
MCP_BASE_URL="http://127.0.0.1:9380"
MCP_SCRIPT_PATH="/ragflow/mcp/server/server.py"
MCP_MODE="self-host"
MCP_HOST_API_KEY=""
MCP_TRANSPORT_SSE_FLAG="--transport-sse-enabled"
MCP_TRANSPORT_STREAMABLE_HTTP_FLAG="--transport-streamable-http-enabled"
MCP_JSON_RESPONSE_FLAG="--json-response"
# -----------------------------------------------------------------------------
# Host ID logic:
# 1. By default, use the system hostname if length <= 32
# 2. Otherwise, use the full MD5 hash of the hostname (32 hex chars)
# -----------------------------------------------------------------------------
CURRENT_HOSTNAME="$(hostname)"
if [ ${#CURRENT_HOSTNAME} -le 32 ]; then
DEFAULT_HOST_ID="$CURRENT_HOSTNAME"
else
DEFAULT_HOST_ID="$(echo -n "$CURRENT_HOSTNAME" | md5sum | cut -d ' ' -f 1)"
fi
HOST_ID="$DEFAULT_HOST_ID"
# Parse arguments
for arg in "$@"; do
case $arg in
--disable-webserver)
ENABLE_WEBSERVER=0
shift
;;
--disable-taskexecutor)
ENABLE_TASKEXECUTOR=0
shift
;;
--disable-datasync)
ENABLE_DATASYNC=0
shift
;;
--enable-mcpserver)
ENABLE_MCP_SERVER=1
shift
;;
--enable-adminserver)
ENABLE_ADMIN_SERVER=1
shift
;;
--init-superuser)
INIT_SUPERUSER_ARGS="--init-superuser"
shift
;;
--mcp-host=*)
MCP_HOST="${arg#*=}"
shift
;;
--mcp-port=*)
MCP_PORT="${arg#*=}"
shift
;;
--mcp-base-url=*)
MCP_BASE_URL="${arg#*=}"
shift
;;
--mcp-mode=*)
MCP_MODE="${arg#*=}"
shift
;;
--mcp-host-api-key=*)
MCP_HOST_API_KEY="${arg#*=}"
shift
;;
--mcp-script-path=*)
MCP_SCRIPT_PATH="${arg#*=}"
shift
;;
--no-transport-sse-enabled)
MCP_TRANSPORT_SSE_FLAG="--no-transport-sse-enabled"
shift
;;
--no-transport-streamable-http-enabled)
MCP_TRANSPORT_STREAMABLE_HTTP_FLAG="--no-transport-streamable-http-enabled"
shift
;;
--no-json-response)
MCP_JSON_RESPONSE_FLAG="--no-json-response"
shift
;;
--consumer-no-beg=*)
CONSUMER_NO_BEG="${arg#*=}"
shift
;;
--consumer-no-end=*)
CONSUMER_NO_END="${arg#*=}"
shift
;;
--workers=*)
WORKERS="${arg#*=}"
shift
;;
--host-id=*)
HOST_ID="${arg#*=}"
shift
;;
*)
usage
;;
esac
done
# -----------------------------------------------------------------------------
# Replace env variables in the service_conf.yaml file
# -----------------------------------------------------------------------------
CONF_DIR="/ragflow/conf"
TEMPLATE_FILE="${CONF_DIR}/service_conf.yaml.template"
CONF_FILE="${CONF_DIR}/service_conf.yaml"
rm -f "${CONF_FILE}"
DEF_ENV_VALUE_PATTERN="\$\{([^:]+):-([^}]+)\}"
while IFS= read -r line || [[ -n "$line" ]]; do
if [[ "$line" =~ DEF_ENV_VALUE_PATTERN ]]; then
varname="${BASH_REMATCH[1]}"
default="${BASH_REMATCH[2]}"
if [ -n "${!varname}" ]; then
eval "echo \"$line"\" >> "${CONF_FILE}"
else
echo "$line" | sed -E "s/\\\$\{[^:]+:-([^}]+)\}/\1/g" >> "${CONF_FILE}"
fi
else
eval "echo \"$line\"" >> "${CONF_FILE}"
fi
done < "${TEMPLATE_FILE}"
export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu/"
PY=python3
# -----------------------------------------------------------------------------
# Select Nginx Configuration based on API_PROXY_SCHEME
# -----------------------------------------------------------------------------
NGINX_CONF_DIR="/etc/nginx/conf.d"
if [ -n "$API_PROXY_SCHEME" ]; then
if [[ "${API_PROXY_SCHEME}" == "hybrid" ]]; then
cp -f "$NGINX_CONF_DIR/ragflow.conf.hybrid" "$NGINX_CONF_DIR/ragflow.conf"
echo "Applied nginx config: ragflow.conf.hybrid"
elif [[ "${API_PROXY_SCHEME}" == "go" ]]; then
cp -f "$NGINX_CONF_DIR/ragflow.conf.golang" "$NGINX_CONF_DIR/ragflow.conf"
echo "Applied nginx config: ragflow.conf.golang (default)"
else
cp -f "$NGINX_CONF_DIR/ragflow.conf.python" "$NGINX_CONF_DIR/ragflow.conf"
echo "Applied nginx config: ragflow.conf.python"
fi
else
# Default to python backend
cp -f "$NGINX_CONF_DIR/ragflow.conf.python" "$NGINX_CONF_DIR/ragflow.conf"
echo "Default: applied nginx config: ragflow.conf.python"
fi
# -----------------------------------------------------------------------------
# Function(s)
# -----------------------------------------------------------------------------
function task_exe() {
local consumer_id="$1"
local host_id="$2"
JEMALLOC_PATH="$(pkg-config --variable=libdir jemalloc)/libjemalloc.so"
while true; do
LD_PRELOAD="$JEMALLOC_PATH" \
"$PY" rag/svr/task_executor.py "${host_id}_${consumer_id}" &
wait;
sleep 1;
done
}
function start_mcp_server() {
echo "Starting MCP Server on ${MCP_HOST}:${MCP_PORT} with base URL ${MCP_BASE_URL}..."
"$PY" "${MCP_SCRIPT_PATH}" \
--host="${MCP_HOST}" \
--port="${MCP_PORT}" \
--base-url="${MCP_BASE_URL}" \
--mode="${MCP_MODE}" \
--api-key="${MCP_HOST_API_KEY}" \
"${MCP_TRANSPORT_SSE_FLAG}" \
"${MCP_TRANSPORT_STREAMABLE_HTTP_FLAG}" \
"${MCP_JSON_RESPONSE_FLAG}" &
}
function ensure_docling() {
[[ "${USE_DOCLING}" == "true" ]] || { echo "[docling] disabled by USE_DOCLING"; return 0; }
DOCLING_PIN="${DOCLING_VERSION:-==2.71.0}"
"$PY" -c "import importlib.util,sys; sys.exit(0 if importlib.util.find_spec('docling') else 1)" \
|| uv pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --extra-index-url https://pypi.org/simple --no-cache-dir "docling${DOCLING_PIN}"
}
function ensure_db_init() {
echo "Initializing database tables..."
"$PY" -c "from api.db.db_models import init_database_tables as init_web_db; init_web_db()"
echo "Database tables initialized."
}
function wait_for_server() {
local url="$1"
local server_name="$2"
local timeout=90
local interval=2
local start_time=$(date +%s)
echo "Waiting for $server_name to be ready at $url..."
while ! curl -f -s -o /dev/null "$url"; do
if [ $(($(date +%s) - start_time)) -gt $timeout ]; then
echo "Timeout waiting for $server_name after $timeout seconds"
return 1
fi
sleep $interval
done
echo "$server_name is ready."
}
# -----------------------------------------------------------------------------
# Start components based on flags
# -----------------------------------------------------------------------------
ensure_docling
ensure_db_init
if [[ "${ENABLE_WEBSERVER}" -eq 1 ]]; then
echo "Starting nginx..."
/usr/sbin/nginx
while true; do
echo "Attempt to start RAGFlow server..."
"$PY" api/ragflow_server.py ${INIT_SUPERUSER_ARGS}
echo "RAGFlow python server started."
sleep 1;
done &
if [[ "${API_PROXY_SCHEME}" == "hybrid" ]]; then
while true; do
echo "Attempt to start RAGFlow go server..."
wait_for_server "http://127.0.0.1:9380/healthz" "ragflow_server"
echo "Starting RAGFlow go server..."
bin/server_main
sleep 1;
done &
fi
fi
if [[ "${ENABLE_ADMIN_SERVER}" -eq 1 ]]; then
while true; do
echo "Attempt to start Admin python server..."
"$PY" admin/server/admin_server.py
echo "Admin python server started"
sleep 1;
done &
if [[ "${API_PROXY_SCHEME}" == "hybrid" ]]; then
while true; do
echo "Attempt to starting Admin go server..."
wait_for_server "http://127.0.0.1:9381/api/v1/admin/ping" "admin_server"
echo "Starting Admin go server..."
bin/admin_server
sleep 1;
done &
fi
fi
if [[ "${ENABLE_DATASYNC}" -eq 1 ]]; then
echo "Starting data sync..."
while true; do
"$PY" rag/svr/sync_data_source.py &
wait;
sleep 1;
done &
fi
if [[ "${ENABLE_MCP_SERVER}" -eq 1 ]]; then
start_mcp_server
fi
if [[ "${ENABLE_TASKEXECUTOR}" -eq 1 ]]; then
if [[ "${CONSUMER_NO_END}" -gt "${CONSUMER_NO_BEG}" ]]; then
echo "Starting task executors on host '${HOST_ID}' for IDs in [${CONSUMER_NO_BEG}, ${CONSUMER_NO_END})..."
for (( i=CONSUMER_NO_BEG; i<CONSUMER_NO_END; i++ ))
do
task_exe "${i}" "${HOST_ID}" &
done
else
# Otherwise, start a fixed number of workers
echo "Starting ${WORKERS} task executor(s) on host '${HOST_ID}'..."
for (( i=0; i<WORKERS; i++ ))
do
task_exe "${i}" "${HOST_ID}" &
done
fi
fi
wait