diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index 6d6bbd27d6..222b4353df 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -90,6 +90,7 @@ const Env = z.object({ KUBERNETES_MEMORY_REQUEST_MIN_GB: z.coerce.number().min(0).default(0), KUBERNETES_MEMORY_REQUEST_RATIO: z.coerce.number().min(0).max(1).default(1), // Ratio of memory limit, so 1 = 100% of memory limit KUBERNETES_MEMORY_OVERHEAD_GB: z.coerce.number().min(0).optional(), // Optional memory overhead to add to the limit in GB + KUBERNETES_TOPOLOGY_SPREAD_CONSTRAINTS: z.string().optional(), // JSON string // Placement tags settings PLACEMENT_TAGS_ENABLED: BoolEnv.default(false), diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts index 0f5e89c80a..7d46d452af 100644 --- a/apps/supervisor/src/workloadManager/kubernetes.ts +++ b/apps/supervisor/src/workloadManager/kubernetes.ts @@ -56,6 +56,22 @@ export class KubernetesWorkloadManager implements WorkloadManager { }; } + private parseTopologySpreadConstraints(): k8s.V1TopologySpreadConstraint[] | null { + if (!env.KUBERNETES_TOPOLOGY_SPREAD_CONSTRAINTS) { + return null; + } + + try { + return JSON.parse(env.KUBERNETES_TOPOLOGY_SPREAD_CONSTRAINTS); + } catch (error) { + this.logger.error("[KubernetesWorkloadManager] Failed to parse topology spread constraints", { + error: error instanceof Error ? error.message : String(error), + raw: env.KUBERNETES_TOPOLOGY_SPREAD_CONSTRAINTS, + }); + return null; + } + } + private stripImageDigest(imageRef: string): string { if (!env.KUBERNETES_STRIP_IMAGE_DIGEST) { return imageRef; @@ -270,6 +286,8 @@ export class KubernetesWorkloadManager implements WorkloadManager { } get #defaultPodSpec(): Omit { + const topologySpreadConstraints = this.parseTopologySpreadConstraints(); + return { restartPolicy: "Never", automountServiceAccountToken: false, @@ -281,6 +299,7 @@ export class KubernetesWorkloadManager implements WorkloadManager { }, } : {}), + ...(topologySpreadConstraints ? { topologySpreadConstraints } : {}), }; } diff --git a/hosting/k8s/helm/Chart.yaml b/hosting/k8s/helm/Chart.yaml index 42d962130f..f88378829b 100644 --- a/hosting/k8s/helm/Chart.yaml +++ b/hosting/k8s/helm/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: trigger description: The official Trigger.dev Helm chart type: application -version: 4.0.1 +version: 4.0.2 appVersion: v4.0.4 home: https://trigger.dev sources: diff --git a/hosting/k8s/helm/templates/supervisor.yaml b/hosting/k8s/helm/templates/supervisor.yaml index db85edf395..8eb68a3e5e 100644 --- a/hosting/k8s/helm/templates/supervisor.yaml +++ b/hosting/k8s/helm/templates/supervisor.yaml @@ -184,6 +184,10 @@ spec: value: {{ default "10Gi" .Values.supervisor.config.kubernetes.ephemeralStorageSizeLimit | quote }} - name: KUBERNETES_EPHEMERAL_STORAGE_SIZE_REQUEST value: {{ default "2Gi" .Values.supervisor.config.kubernetes.ephemeralStorageSizeRequest | quote }} + {{- with .Values.supervisor.config.kubernetes.topologySpreadConstraints }} + - name: KUBERNETES_TOPOLOGY_SPREAD_CONSTRAINTS + value: {{ tpl (toYaml .) $ | toJson | quote }} + {{- end }} # Pod cleaner configuration - name: POD_CLEANER_ENABLED value: {{ .Values.supervisor.config.podCleaner.enabled | quote }} @@ -272,6 +276,10 @@ spec: tolerations: {{- toYaml . | nindent 8 }} {{- end }} + {{- with .Values.supervisor.topologySpreadConstraints }} + topologySpreadConstraints: + {{- tpl (toYaml .) $ | nindent 8 }} + {{- end }} --- apiVersion: v1 kind: Service diff --git a/hosting/k8s/helm/templates/webapp.yaml b/hosting/k8s/helm/templates/webapp.yaml index f9f59c363f..77bb915af8 100644 --- a/hosting/k8s/helm/templates/webapp.yaml +++ b/hosting/k8s/helm/templates/webapp.yaml @@ -419,6 +419,10 @@ spec: tolerations: {{- toYaml . | nindent 8 }} {{- end }} + {{- with .Values.webapp.topologySpreadConstraints }} + topologySpreadConstraints: + {{- tpl (toYaml .) $ | nindent 8 }} + {{- end }} --- apiVersion: v1 kind: Service diff --git a/hosting/k8s/helm/values.yaml b/hosting/k8s/helm/values.yaml index 5f8fdaf046..d6faa03e40 100644 --- a/hosting/k8s/helm/values.yaml +++ b/hosting/k8s/helm/values.yaml @@ -69,6 +69,8 @@ webapp: nodeSelector: {} tolerations: [] affinity: {} + # Topology Spread Constraints for pod assignment spread across your cluster among failure-domains. Evaluated as a template + topologySpreadConstraints: [] logLevel: "info" gracefulShutdownTimeout: 1000 @@ -263,6 +265,8 @@ supervisor: workerNodetypeLabel: "" # When set, runs will only be scheduled on nodes with "nodetype=