-
Notifications
You must be signed in to change notification settings - Fork 76
Expand file tree
/
Copy pathkind-deploy.sh
More file actions
executable file
·155 lines (123 loc) · 5.17 KB
/
kind-deploy.sh
File metadata and controls
executable file
·155 lines (123 loc) · 5.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/bin/bash
# This shell script deploys a kind cluster with the UDS tokenizer and the simulator
set -eo pipefail
# ------------------------------------------------------------------------------
# Check variables
# ------------------------------------------------------------------------------
require_non_empty() {
local name=$1
if [ -z "${!name-}" ]; then
echo "Error: $name must not be empty" >&2
exit 1
fi
}
require_non_empty CLUSTER_NAME
require_non_empty HOST_PORT
require_non_empty MODEL_NAME
require_non_empty VLLM_SIMULATOR_IMAGE
require_non_empty UDS_TOKENIZER_IMAGE
export CLUSTER_NAME
export HOST_PORT
export MODEL_NAME
export VLLM_SIMULATOR_IMAGE
export UDS_TOKENIZER_IMAGE
export HF_TOKEN
# ------------------------------------------------------------------------------
# Setup & Requirement Checks
# ------------------------------------------------------------------------------
# Check for a supported container runtime if an explicit one was not set
if [ -z "${CONTAINER_RUNTIME}" ]; then
if command -v docker &> /dev/null; then
CONTAINER_RUNTIME="docker"
elif command -v podman &> /dev/null; then
CONTAINER_RUNTIME="podman"
else
echo "Neither docker nor podman could be found in PATH" >&2
exit 1
fi
fi
set -u
# Check for required programs
for cmd in kind kubectl ${CONTAINER_RUNTIME}; do
if ! command -v "$cmd" &> /dev/null; then
echo "Error: $cmd is not installed or not in the PATH."
exit 1
fi
done
# ------------------------------------------------------------------------------
# Cluster Deployment
# ------------------------------------------------------------------------------
# Check if the cluster already exists
if kind get clusters 2>/dev/null | grep -q "^${CLUSTER_NAME}$"; then
echo "Cluster '${CLUSTER_NAME}' already exists, re-using"
else
kind create cluster --name "${CLUSTER_NAME}" --config - << EOF
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
extraPortMappings:
- containerPort: ${HOST_PORT}
hostPort: ${HOST_PORT}
protocol: TCP
EOF
fi
# Set the kubectl context to the kind cluster
KUBE_CONTEXT="kind-${CLUSTER_NAME}"
kubectl config set-context ${KUBE_CONTEXT} --namespace=default
set -x
# Hotfix for https://github.com/kubernetes-sigs/kind/issues/3880
CONTAINER_NAME="${CLUSTER_NAME}-control-plane"
${CONTAINER_RUNTIME} exec -it ${CONTAINER_NAME} /bin/bash -c "sysctl net.ipv4.conf.all.arp_ignore=0"
# Wait for all pods to be ready
kubectl --context ${KUBE_CONTEXT} -n kube-system wait --for=condition=Ready --all pods --timeout=300s
echo "Waiting for local-path-storage pods to be created..."
until kubectl --context ${KUBE_CONTEXT} -n local-path-storage get pods -o name | grep -q pod/; do
sleep 2
done
kubectl --context ${KUBE_CONTEXT} -n local-path-storage wait --for=condition=Ready --all pods --timeout=300s
# ------------------------------------------------------------------------------
# Load Container Images
# ------------------------------------------------------------------------------
# Load the vllm simulator and uds tokenizer images into the cluster (only if it's a locally built image)
if [ -n "$(${CONTAINER_RUNTIME} images -q "${VLLM_SIMULATOR_IMAGE}")" ]; then
echo "INFO: Loading vllm-sim image into KIND cluster..."
if [ "${CONTAINER_RUNTIME}" == "podman" ]; then
podman save ${VLLM_SIMULATOR_IMAGE} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin
else
kind --name ${CLUSTER_NAME} load docker-image ${VLLM_SIMULATOR_IMAGE}
fi
fi
if [ -n "$(${CONTAINER_RUNTIME} images -q "${UDS_TOKENIZER_IMAGE}")" ]; then
echo "INFO: Loading uds-tokenizer image into KIND cluster..."
if [ "${CONTAINER_RUNTIME}" == "podman" ]; then
podman save ${UDS_TOKENIZER_IMAGE} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin
else
kind --name ${CLUSTER_NAME} load docker-image ${UDS_TOKENIZER_IMAGE}
fi
fi
# ------------------------------------------------------------------------------
# Development Environment
# ------------------------------------------------------------------------------
kubectl kustomize deploy \
| envsubst '${MODEL_NAME} ${VLLM_SIMULATOR_IMAGE} ${UDS_TOKENIZER_IMAGE} ${HOST_PORT} ${HF_TOKEN}' \
| kubectl --context ${KUBE_CONTEXT} apply -f -
# ------------------------------------------------------------------------------
# Check & Verify
# ------------------------------------------------------------------------------
# Wait for all deployments to be ready
kubectl --context ${KUBE_CONTEXT} -n default wait --for=condition=available --timeout=300s deployment --all
cat <<EOF
-----------------------------------------
Deployment completed!
* Kind Cluster Name: ${CLUSTER_NAME}
* Kubectl Context: ${KUBE_CONTEXT}
Status:
* The vllm simulator is running
* The UDS tokenizer is running
You can watch the Simulator logs with:
$ kubectl --context ${KUBE_CONTEXT} logs -f deployments/vllm-sim
With that running in the background, you can make requests:
$ curl -s -w '\n' http://localhost:${HOST_PORT}/v1/completions -H 'Content-Type: application/json' -d '{"model":"${MODEL_NAME}","prompt":"hi","max_tokens":10,"temperature":0}' | jq
-----------------------------------------
EOF