Skip to content

Commit d672fa4

Browse files
accorvintmckayus
authored andcommitted
Allow user to specify Node Tolerations for cluster pods
This adds support to the operator for a user to be able to specify a set of Node Tolerations to be applied to the pods in a cluster.
1 parent 41d7d77 commit d672fa4

7 files changed

Lines changed: 102 additions & 1 deletion

File tree

.travis/.travis.test-common.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,20 @@ testOverwriteLimits() {
330330
os::cmd::expect_success_and_text '${BIN} delete ${KIND} my-spark-cluster-overwritelim' '"my-spark-cluster-overwritelim" deleted'
331331
}
332332

333+
testNodeTolerations() {
334+
info
335+
sleep 2
336+
os::cmd::expect_success_and_text "${BIN} create -f $DIR/../examples/test/${CM}cluster-node-tolerations.yaml" '"?spark-cluster-with-tolerations"? created' && \
337+
os::cmd::try_until_success "${BIN} get pod -l radanalytics.io/deployment=spark-cluster-with-tolerations-w -o=jsonpath='{.items[0].spec.tolerations}'" && \
338+
local tolerations=`${BIN} get pod -l radanalytics.io/deployment=spark-cluster-with-tolerations-w -o='jsonpath="{.items[0].spec.tolerations}"' | sed 's/"//g'` && \
339+
os::cmd::expect_success_and_text 'echo $tolerations' 'tolerationSeconds:60' && \
340+
os::cmd::expect_success_and_text 'echo $tolerations' 'value:foo_value' && \
341+
os::cmd::expect_success_and_text 'echo $tolerations' 'effect:NoExecute' && \
342+
os::cmd::expect_success_and_text 'echo $tolerations' 'key:foo_key' && \
343+
os::cmd::expect_success_and_text 'echo $tolerations' 'operator:Equal' && \
344+
os::cmd::expect_success_and_text '${BIN} delete ${KIND} spark-cluster-with-tolerations' '"spark-cluster-with-tolerations" deleted'
345+
}
346+
333347
testApp() {
334348
info
335349
[ "$CRD" = "0" ] && FOO="test/cm/" || FOO=""

.travis/.travis.test-oc-and-k8s.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ run_tests() {
5050
sleep 5
5151

5252
run_limit_request_tests || errorLogs
53+
sleep 5
54+
55+
testNodeTolerations || errorLogs
5356

5457
sleep 10
5558
testApp || appErrorLogs

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,14 @@ You can see these being used in the *examples/test* directory.
7676

7777
* *cpuLimit* and *memoryLimit* set limit values and take precedence over values taken from *cpu* and *memory* respectively
7878

79+
# Node Tolerations for SparkCluster pods
80+
81+
The operator supports specifying [Kubernetes node tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration)
82+
which will be applied to all master and worker pods in a Spark cluster.
83+
You can see examples of this in use in the *examples/test* directory.
84+
85+
* *nodeTolerations* specifies a list of Node Tolerations definitions that should
86+
be applied to all master and worker nodes.
7987

8088
## Spark Applications
8189

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
apiVersion: radanalytics.io/v1
2+
kind: SparkCluster
3+
metadata:
4+
name: spark-cluster-with-tolerations
5+
spec:
6+
nodeTolerations:
7+
- key: foo_key
8+
operator: Equal
9+
value: foo_value
10+
effect: NoExecute
11+
tolerationSeconds: 60
12+
worker:
13+
instances: "1"
14+
master:
15+
instances: "1"
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: spark-cluster-with-tolerations
5+
labels:
6+
radanalytics.io/kind: SparkCluster
7+
data:
8+
config: |-
9+
nodeTolerations:
10+
- key: foo_key
11+
operator: Equal
12+
value: foo_value
13+
effect: NoExecute
14+
tolerationSeconds: 60
15+
worker:
16+
instances: "1"
17+
master:
18+
instances: "1"

src/main/java/io/radanalytics/operator/cluster/KubernetesSparkClusterDeployer.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,9 @@ private ReplicationController getRCforMasterOrWorker(boolean isMaster, SparkClus
166166
podLabels.put(prefix + LabelsHelper.OPERATOR_POD_TYPE_LABEL, isMaster ? OPERATOR_TYPE_MASTER_LABEL : OPERATOR_TYPE_WORKER_LABEL);
167167
addLabels(podLabels, cluster, isMaster);
168168

169+
// pod tolerations
170+
List<Toleration> tolerations = getTolerations(cluster, isMaster);
171+
169172
PodTemplateSpecFluent.SpecNested<ReplicationControllerSpecFluent.TemplateNested<ReplicationControllerFluent.SpecNested<ReplicationControllerBuilder>>> rcBuilder = new ReplicationControllerBuilder().withNewMetadata()
170173
.withName(podName).withLabels(labels)
171174
.endMetadata()
@@ -178,7 +181,7 @@ private ReplicationController getRCforMasterOrWorker(boolean isMaster, SparkClus
178181
)
179182
.withSelector(selector)
180183
.withNewTemplate().withNewMetadata().withLabels(podLabels).endMetadata()
181-
.withNewSpec().withContainers(containerBuilder.build());
184+
.withNewSpec().withTolerations(tolerations).withContainers(containerBuilder.build());
182185

183186
ReplicationController rc = rcBuilder.endSpec().endTemplate().endSpec().build();
184187

@@ -261,6 +264,16 @@ private void getLimitRequestValues(Boolean isMaster, Master m, Worker w, Map<Str
261264
Optional.ofNullable(isMaster ? m.getCpuRequest() : w.getCpuRequest()).ifPresent(cpuval -> requests.put("cpu", new Quantity(cpuval)));
262265
}
263266

267+
private List<Toleration> getTolerations(SparkCluster cluster, boolean isMaster) {
268+
List<Toleration> tolerations = new ArrayList<Toleration>();
269+
List<NodeToleration> nodeTolerations = cluster.getNodeTolerations();
270+
nodeTolerations.forEach(t -> {
271+
tolerations.add(new Toleration(t.getEffect(), t.getKey(), t.getOperator(), (long) t.getTolerationSeconds(), t.getValue()));
272+
});
273+
return tolerations;
274+
275+
}
276+
264277
private ContainerBuilder augmentContainerBuilder(SparkCluster cluster, ContainerBuilder builder, boolean isMaster) {
265278
Master m = null;
266279
Worker w = null;

src/main/resources/schema/sparkCluster.json

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,36 @@
9898
}
9999
}
100100
},
101+
"nodeTolerations": {
102+
"type": "array",
103+
"items": {
104+
"type": "object",
105+
"properties": {
106+
"key": {
107+
"type": "string"
108+
},
109+
"operator": {
110+
"type": "string"
111+
},
112+
"value": {
113+
"type": "string"
114+
},
115+
"effect": {
116+
"type": "string"
117+
},
118+
"tolerationSeconds": {
119+
"type": "integer",
120+
"default": null
121+
}
122+
},
123+
"required": [
124+
"key",
125+
"operator",
126+
"value",
127+
"effect"
128+
]
129+
}
130+
},
101131
"mavenDependencies": {
102132
"type": "array",
103133
"items": {

0 commit comments

Comments
 (0)