-
Notifications
You must be signed in to change notification settings - Fork 911
128 lines (108 loc) · 4.84 KB
/
pre-merge-e2e.yml
File metadata and controls
128 lines (108 loc) · 4.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
name: e2e pre-merge checks
on:
# Always run this workflow when commits are pushed to main.
push:
branches:
- main
# Run this workflow on pull requests targeting main but only on rust changes.
pull_request:
paths:
- .github/workflows/pre-merge-e2e.yml
- 'container/**'
- 'deploy/cloud/operator/**'
jobs:
pre-merge-e2e:
runs-on: cpu-amd-m5-2xlarge
env:
NAMESPACE: gh-job-id-${{ github.run_id }}
strategy:
matrix: { FRAMEWORK: ['vllm', 'trtllm', 'sglang'] }
permissions:
contents: read
steps:
- uses: actions/checkout@v4
- name: Before Script
run: |
# Install dependencies
apt-get update && apt-get install -y curl bash openssl gettext git jq
# # TODO:
# set -x
# git clone -b $DYNAMO_REPO_BRANCH --depth=1 --single-branch https://dynamo-ai/dynamo dynamo
# # use nvidia internal bitnami charts to avoid rate limiting
# find dynamo -type f -exec sed -i 's|https://charts.bitnami.com/bitnami|oci://dockerhub.nvidia.com/bitnamicharts|g' {} +
# echo $DYNAMO_REPO_CI_COMMIT_SHA | tee -a build.env
# uv is already installed, but we need to sync the dependencies
export PATH="/root/.local/bin:/opt/dynamo/venv/bin:$PATH"
uv sync
# Install yq
echo "Installing yq..."
curl -L https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -o yq
chmod +x yq
mv yq /usr/local/bin/
# Install Helm
echo "Installing Helm..."
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
chmod 700 get_helm.sh
./get_helm.sh
# Install kubectl
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
chmod +x kubectl
mv kubectl /usr/local/bin/
# Make sure the right context is used
echo "${CI_AKS_KUBECONFIG_B64}" | base64 -d > .kubeconfig
chmod 600 .kubeconfig
export KUBECONFIG=$(pwd)/.kubeconfig
kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}"
# Create a namespace for this job
echo "Creating an ephemeral namespace..."
kubectl delete namespace $NAMESPACE || true
kubectl create namespace $NAMESPACE || true
echo "Attaching the labels for secrets and cleanup"
kubectl label namespaces ${NAMESPACE} nscleanup/enabled=true nscleanup/ttl=7200 gitlab-imagepull=enabled ngc-api=enabled nvcr-imagepull=enabled --overwrite=true
# Set the namespace as default
kubectl config set-context --current --namespace=$NAMESPACE
# Get helm repo for platform helm chart
helm repo add --username gitlab-ci-token --password ${CI_JOB_TOKEN} ${CI_PROJECT_NAME} ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/dev
helm repo update
# Check if Istio is installed
kubectl get pods -n istio-system
# Check if default storage class exists
kubectl get storageclass
# Install Helm chart
cat build.env
export IMAGE_TAG=$(cat build.env)
echo $IMAGE_TAG
export VIRTUAL_ENV=/opt/dynamo/venv
export KUBE_NS=$NAMESPACE
export ISTIO_ENABLED=true
export ISTIO_GATEWAY=istio-system/ingress-alb
export VIRTUAL_SERVICE_SUPPORTS_HTTPS=true
export DYNAMO_CLOUD=https://${NAMESPACE}.${DYNAMO_INGRESS_SUFFIX}
export DOCKER_SERVER=$CI_REGISTRY_IMAGE
export DOCKER_USERNAME=gitlab-ci-token
export DOCKER_PASSWORD=${CI_JOB_TOKEN}
# Install dynamo env secrets
kubectl create secret generic hf-token-secret --from-literal=HF_TOKEN=$HF_TOKEN -n $KUBE_NS || true
cd ${DYNAMO_REPO}
- name: After Script
if: always()
timeout-minutes: 10
run: |
echo "${CI_AKS_KUBECONFIG_B64}" | base64 -d > .kubeconfig
chmod 600 .kubeconfig
export KUBECONFIG=$(pwd)/.kubeconfig
kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}"
# For debugging purposes, list all the resources before we uninstall
kubectl get all
echo "Starting cleanup... after $CLEANUP_TIMEOUT seconds"
sleep $CLEANUP_TIMEOUT
echo "Deleting all DynamoGraphDeployments in namespace $NAMESPACE..."
kubectl delete dynamographdeployments --all -n $NAMESPACE || true
# Uninstall the helm chart
helm ls
helm uninstall dynamo-platform || true
echo "Namespace $NAMESPACE deletion initiated, proceeding with cleanup..."
timeout 7200s kubectl delete namespace $NAMESPACE || true
echo "Namespace $NAMESPACE completed."