Skip to content
This repository was archived by the owner on Dec 4, 2024. It is now read-only.

Commit 537b267

Browse files
authored
[DCOS-55706] Add parameter files for small-scale test and test. (#532)
1 parent 229bdf8 commit 537b267

File tree

2 files changed

+305
-0
lines changed

2 files changed

+305
-0
lines changed
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# Depends on:
2+
# - TEST_NAME
3+
# - TEST_S3_BUCKET
4+
# - TEST_S3_FOLDER
5+
6+
# Notes:
7+
#
8+
# When this test was run (2019-06-26), soak114s had 61 agents in total:
9+
# - 49 private agents with 4 CPUs, 14GB MEM and 245GB disk
10+
# - 8 private agents with 8 CPUs, 30GB MEM and 1.4TB disk
11+
# - 1 private agents with 4 CPUs, 58GB MEM and 290GB disk and 1 GPU
12+
# - 3 public agents with 4 CPUs, 14GB MEM and 245GB disk
13+
14+
# Cluster configuration ########################################################
15+
# Total CPU quota: 50
16+
# Total MEM quota: 130000
17+
18+
CLUSTER_URL="https://soak114s.testing.mesosphe.re"
19+
SECURITY="strict"
20+
21+
# Test configuration ###########################################################
22+
23+
SHOULD_INSTALL_INFRASTRUCTURE=true
24+
SHOULD_INSTALL_NON_GPU_DISPATCHERS=true
25+
SHOULD_INSTALL_GPU_DISPATCHERS=true
26+
SHOULD_RUN_FAILING_STREAMING_JOBS=false
27+
SHOULD_RUN_FINITE_STREAMING_JOBS=true
28+
SHOULD_RUN_INFINITE_STREAMING_JOBS=true
29+
SHOULD_RUN_BATCH_JOBS=true
30+
SHOULD_RUN_GPU_BATCH_JOBS=false
31+
SHOULD_UNINSTALL_INFRASTRUCTURE_AT_THE_END=false
32+
33+
# Infrastructure configuration #################################################
34+
35+
SERVICE_NAMES_PREFIX="${TEST_NAME}/"
36+
INFRASTRUCTURE_OUTPUT_FILE="infrastructure.json"
37+
KAFKA_ZOOKEEPER_CONFIG='scale-tests/configs/kafka-zookeeper-options.json'
38+
KAFKA_CLUSTER_COUNT=1
39+
KAFKA_CONFIG='scale-tests/configs/kafka-options.json'
40+
CASSANDRA_CLUSTER_COUNT=1
41+
CASSANDRA_CONFIG='scale-tests/configs/cassandra-options.json'
42+
43+
# Spark configuration ##########################################################
44+
45+
# Note: leaving the Spark executor Docker image empty so that
46+
# executors inherit the image used for dispatchers.
47+
SPARK_EXECUTOR_DOCKER_IMAGE=
48+
49+
# Spark 2.8.0-2.4.0 from the Universe.
50+
SPARK_PACKAGE_REPO=
51+
52+
# Non-GPU dispatchers configuration ############################################
53+
# CPU quota: 48
54+
# MEM quota: 120000
55+
56+
NON_GPU_NUM_DISPATCHERS=3
57+
NON_GPU_DISPATCHERS_OUTPUT_FILE="non-gpu-dispatchers.out"
58+
NON_GPU_DISPATCHERS_JSON_OUTPUT_FILE="${NON_GPU_DISPATCHERS_OUTPUT_FILE}-dispatchers.json" # NOTE: this name is built internally by the deploy-dispatchers.py script.
59+
NON_GPU_QUOTA_DRIVERS_CPUS=8
60+
NON_GPU_QUOTA_DRIVERS_MEM=20000
61+
NON_GPU_QUOTA_EXECUTORS_CPUS=8
62+
NON_GPU_QUOTA_EXECUTORS_MEM=20000
63+
64+
# GPU dispatchers configuration ################################################
65+
# CPU quota: 4
66+
# MEM quota: 50000
67+
68+
GPU_NUM_DISPATCHERS=1
69+
GPU_DISPATCHERS_OUTPUT_FILE="gpu-dispatchers.out"
70+
GPU_DISPATCHERS_JSON_OUTPUT_FILE="${GPU_DISPATCHERS_OUTPUT_FILE}-dispatchers.json" # NOTE: this name is built internally by the deploy-dispatchers.py script.
71+
GPU_QUOTA_DRIVERS_CPUS=2
72+
GPU_QUOTA_DRIVERS_MEM=10000
73+
GPU_QUOTA_DRIVERS_GPUS=
74+
GPU_QUOTA_EXECUTORS_CPUS=
75+
GPU_QUOTA_EXECUTORS_MEM=
76+
GPU_QUOTA_EXECUTORS_GPUS=
77+
# NOTE: to test Core team’s hypothesis of GPU quota negatively impacting spark
78+
# launch rates, we remove the GPU quota entirely from all executor roles.
79+
GPU_REMOVE_EXECUTORS_ROLES_QUOTAS=true
80+
81+
# Common streaming jobs configuration ##########################################
82+
83+
TEST_ASSEMBLY_JAR_URL='http://infinity-artifacts.s3.amazonaws.com/scale-tests/dcos-spark-scala-tests-assembly-2.4.0-20190325.jar'
84+
NUM_DISPATCHERS="$((${NON_GPU_NUM_DISPATCHERS} + ${GPU_NUM_DISPATCHERS}))"
85+
DISPATCHERS_JSON_OUTPUT_FILE="all-dispatchers.json"
86+
87+
# Failing streaming jobs configuration #########################################
88+
89+
FAILING_SUBMISSIONS_OUTPUT_FILE="failing-submissions.out"
90+
FAILING_NUM_PRODUCERS_PER_KAFKA="${NON_GPU_NUM_DISPATCHERS}"
91+
FAILING_NUM_CONSUMERS_PER_PRODUCER=1
92+
FAILING_PRODUCER_NUMBER_OF_WORDS=7692
93+
FAILING_PRODUCER_WORDS_PER_SECOND=1
94+
FAILING_PRODUCER_SPARK_CORES_MAX=2
95+
FAILING_PRODUCER_SPARK_EXECUTOR_CORES=2
96+
FAILING_CONSUMER_BATCH_SIZE_SECONDS=10
97+
FAILING_CONSUMER_SPARK_CORES_MAX=1
98+
FAILING_CONSUMER_SPARK_EXECUTOR_CORES=1
99+
100+
# Finite streaming jobs configuration ##########################################
101+
102+
FINITE_SUBMISSIONS_OUTPUT_FILE="finite-submissions.out"
103+
FINITE_NUM_PRODUCERS_PER_KAFKA="${NON_GPU_NUM_DISPATCHERS}"
104+
FINITE_NUM_CONSUMERS_PER_PRODUCER=1
105+
FINITE_PRODUCER_NUMBER_OF_WORDS=7692
106+
FINITE_PRODUCER_WORDS_PER_SECOND=1
107+
FINITE_PRODUCER_SPARK_CORES_MAX=2
108+
FINITE_PRODUCER_SPARK_EXECUTOR_CORES=2
109+
FINITE_CONSUMER_BATCH_SIZE_SECONDS=10
110+
FINITE_CONSUMER_SPARK_CORES_MAX=1
111+
FINITE_CONSUMER_SPARK_EXECUTOR_CORES=1
112+
113+
# Infinite streaming jobs configuration ########################################
114+
115+
INFINITE_SUBMISSIONS_OUTPUT_FILE="infinite-submissions.out"
116+
INFINITE_NUM_PRODUCERS_PER_KAFKA="${NON_GPU_NUM_DISPATCHERS}"
117+
INFINITE_NUM_CONSUMERS_PER_PRODUCER=1
118+
INFINITE_PRODUCER_NUMBER_OF_WORDS=0
119+
INFINITE_PRODUCER_WORDS_PER_SECOND=1
120+
INFINITE_PRODUCER_SPARK_CORES_MAX=2
121+
INFINITE_PRODUCER_SPARK_EXECUTOR_CORES=2
122+
INFINITE_CONSUMER_BATCH_SIZE_SECONDS=10
123+
INFINITE_CONSUMER_SPARK_CORES_MAX=1
124+
INFINITE_CONSUMER_SPARK_EXECUTOR_CORES=1
125+
126+
# Batch jobs configuration #####################################################
127+
128+
NON_GPU_DISPATCHERS_JSON_OUTPUT_FILE_URL="https://${TEST_S3_BUCKET}.s3.amazonaws.com/${TEST_S3_FOLDER}/${NON_GPU_DISPATCHERS_JSON_OUTPUT_FILE}"
129+
130+
BATCH_APP_ID="/${SERVICE_NAMES_PREFIX}batch-workload"
131+
BATCH_SCRIPT_CPUS=2
132+
BATCH_SCRIPT_MEM=8192
133+
BATCH_SUBMITS_PER_MIN=1
134+
BATCH_SPARK_BUILD_BRANCH=master
135+
136+
# Batch GPU jobs configuration #################################################
137+
138+
GPU_DISPATCHERS_JSON_OUTPUT_FILE_URL="https://${TEST_S3_BUCKET}.s3.amazonaws.com/${TEST_S3_FOLDER}/${GPU_DISPATCHERS_JSON_OUTPUT_FILE}"
139+
140+
GPU_APP_ID="/${SERVICE_NAMES_PREFIX}gpu-batch-workload"
141+
GPU_SCRIPT_CPUS=2
142+
GPU_SCRIPT_MEM=4096
143+
GPU_DOCKER_IMAGE='samvantran/spark-dcos-gpu:metrics'
144+
GPU_SUBMITS_PER_MIN=1
145+
GPU_MAX_NUM_DISPATCHERS=${GPU_NUM_DISPATCHERS}
146+
GPU_SPARK_CORES_MAX=2
147+
GPU_SPARK_MESOS_EXECUTOR_GPUS=1
148+
GPU_SPARK_MESOS_MAX_GPUS=1
149+
GPU_SPARK_BUILD_BRANCH=master
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
# Depends on:
2+
# - TEST_NAME
3+
# - TEST_S3_BUCKET
4+
# - TEST_S3_FOLDER
5+
6+
# Cluster configuration ########################################################
7+
# Total CPU quota: 2330
8+
# Total MEM quota: 5500000
9+
10+
CLUSTER_URL="https://mwst17.scaletesting.mesosphe.re"
11+
SECURITY="strict"
12+
13+
# Test configuration ###########################################################
14+
15+
SHOULD_INSTALL_INFRASTRUCTURE=true
16+
SHOULD_INSTALL_NON_GPU_DISPATCHERS=true
17+
SHOULD_INSTALL_GPU_DISPATCHERS=true
18+
SHOULD_RUN_FAILING_STREAMING_JOBS=false
19+
SHOULD_RUN_FINITE_STREAMING_JOBS=true
20+
SHOULD_RUN_INFINITE_STREAMING_JOBS=true
21+
SHOULD_RUN_BATCH_JOBS=true
22+
SHOULD_RUN_GPU_BATCH_JOBS=true
23+
SHOULD_UNINSTALL_INFRASTRUCTURE_AT_THE_END=false
24+
25+
# Infrastructure configuration #################################################
26+
27+
SERVICE_NAMES_PREFIX="${TEST_NAME}/"
28+
INFRASTRUCTURE_OUTPUT_FILE="infrastructure.json"
29+
KAFKA_ZOOKEEPER_CONFIG='scale-tests/configs/kafka-zookeeper-options.json'
30+
KAFKA_CLUSTER_COUNT=1
31+
KAFKA_CONFIG='scale-tests/configs/kafka-options.json'
32+
CASSANDRA_CLUSTER_COUNT=1
33+
CASSANDRA_CONFIG='scale-tests/configs/cassandra-options.json'
34+
35+
# Spark configuration ##########################################################
36+
37+
# Note: leaving the Spark executor Docker image empty so that
38+
# executors inherit the image used for dispatchers.
39+
SPARK_EXECUTOR_DOCKER_IMAGE=
40+
41+
# Spark 2.8.0-2.4.0 from the Universe.
42+
SPARK_PACKAGE_REPO=
43+
44+
# Non-GPU dispatchers configuration ############################################
45+
46+
NON_GPU_NUM_DISPATCHERS=50
47+
NON_GPU_DISPATCHERS_OUTPUT_FILE="non-gpu-dispatchers.out"
48+
NON_GPU_DISPATCHERS_JSON_OUTPUT_FILE="${NON_GPU_DISPATCHERS_OUTPUT_FILE}-dispatchers.json" # NOTE: this name is built internally by the deploy-dispatchers.py script.
49+
NON_GPU_QUOTA_DRIVERS_CPUS=20
50+
NON_GPU_QUOTA_DRIVERS_MEM=50000
51+
NON_GPU_QUOTA_EXECUTORS_CPUS=25
52+
NON_GPU_QUOTA_EXECUTORS_MEM=40000
53+
# Total quota:
54+
# - driver:
55+
# - 1000 cpus
56+
# - 2500000 mem
57+
# - executor:
58+
# - 1250 cpus
59+
# - 2000000 mem
60+
61+
# GPU dispatchers configuration ################################################
62+
63+
GPU_NUM_DISPATCHERS=5
64+
GPU_DISPATCHERS_OUTPUT_FILE="gpu-dispatchers.out"
65+
GPU_DISPATCHERS_JSON_OUTPUT_FILE="${GPU_DISPATCHERS_OUTPUT_FILE}-dispatchers.json" # NOTE: this name is built internally by the deploy-dispatchers.py script.
66+
GPU_QUOTA_DRIVERS_CPUS=16
67+
GPU_QUOTA_DRIVERS_MEM=200000
68+
GPU_QUOTA_DRIVERS_GPUS=
69+
GPU_QUOTA_EXECUTORS_CPUS=
70+
GPU_QUOTA_EXECUTORS_MEM=
71+
GPU_QUOTA_EXECUTORS_GPUS=
72+
# NOTE: to test Core team’s hypothesis of GPU quota negatively impacting spark
73+
# launch rates, we remove the GPU quota entirely from all executor roles.
74+
GPU_REMOVE_EXECUTORS_ROLES_QUOTAS=true
75+
# Total quota:
76+
# - driver:
77+
# - 80 cpus
78+
# - 1000000 mem
79+
# - 0 gpus
80+
# - executor:
81+
# - 0 cpus
82+
# - 0 mem
83+
# - 0 gpus
84+
85+
# Common streaming jobs configuration ##########################################
86+
87+
TEST_ASSEMBLY_JAR_URL='http://infinity-artifacts.s3.amazonaws.com/scale-tests/dcos-spark-scala-tests-assembly-2.4.0-20190325.jar'
88+
NUM_DISPATCHERS="$((${NON_GPU_NUM_DISPATCHERS} + ${GPU_NUM_DISPATCHERS}))"
89+
DISPATCHERS_JSON_OUTPUT_FILE="all-dispatchers.json"
90+
91+
# Failing streaming jobs configuration #########################################
92+
93+
FAILING_SUBMISSIONS_OUTPUT_FILE="failing-submissions.out"
94+
FAILING_NUM_PRODUCERS_PER_KAFKA="${NON_GPU_NUM_DISPATCHERS}"
95+
FAILING_NUM_CONSUMERS_PER_PRODUCER=1
96+
FAILING_PRODUCER_NUMBER_OF_WORDS=7692
97+
FAILING_PRODUCER_WORDS_PER_SECOND=1
98+
FAILING_PRODUCER_SPARK_CORES_MAX=2
99+
FAILING_PRODUCER_SPARK_EXECUTOR_CORES=2
100+
FAILING_CONSUMER_BATCH_SIZE_SECONDS=10
101+
FAILING_CONSUMER_SPARK_CORES_MAX=1
102+
FAILING_CONSUMER_SPARK_EXECUTOR_CORES=1
103+
104+
# Finite streaming jobs configuration ##########################################
105+
106+
FINITE_SUBMISSIONS_OUTPUT_FILE="finite-submissions.out"
107+
FINITE_NUM_PRODUCERS_PER_KAFKA="${NON_GPU_NUM_DISPATCHERS}" # 1 Kafka and 50 dispatchers -> 50 producers.
108+
FINITE_NUM_CONSUMERS_PER_PRODUCER=1 # 50 producers -> 50 consumers.
109+
# 50 producers + 50 consumers = 100 total finite streaming jobs
110+
FINITE_PRODUCER_NUMBER_OF_WORDS=7692
111+
FINITE_PRODUCER_WORDS_PER_SECOND=1
112+
# 7692 words / 1 word per second -> ~2h runtime.
113+
FINITE_PRODUCER_SPARK_CORES_MAX=2
114+
FINITE_PRODUCER_SPARK_EXECUTOR_CORES=2
115+
FINITE_CONSUMER_BATCH_SIZE_SECONDS=10
116+
FINITE_CONSUMER_SPARK_CORES_MAX=1
117+
FINITE_CONSUMER_SPARK_EXECUTOR_CORES=1
118+
119+
# Infinite streaming jobs configuration ########################################
120+
121+
INFINITE_SUBMISSIONS_OUTPUT_FILE="infinite-submissions.out"
122+
INFINITE_NUM_PRODUCERS_PER_KAFKA="${NON_GPU_NUM_DISPATCHERS}" # 1 Kafka and 50 dispatchers -> 50 producers.
123+
INFINITE_NUM_CONSUMERS_PER_PRODUCER=1 # 50 producers -> 50 consumers.
124+
# 50 producers + 50 consumers = 100 total infinite streaming jobs
125+
INFINITE_PRODUCER_NUMBER_OF_WORDS=0
126+
INFINITE_PRODUCER_WORDS_PER_SECOND=1
127+
INFINITE_PRODUCER_SPARK_CORES_MAX=2
128+
INFINITE_PRODUCER_SPARK_EXECUTOR_CORES=2
129+
INFINITE_CONSUMER_BATCH_SIZE_SECONDS=10
130+
INFINITE_CONSUMER_SPARK_CORES_MAX=1
131+
INFINITE_CONSUMER_SPARK_EXECUTOR_CORES=1
132+
133+
# Batch jobs configuration #####################################################
134+
135+
NON_GPU_DISPATCHERS_JSON_OUTPUT_FILE_URL="https://${TEST_S3_BUCKET}.s3.amazonaws.com/${TEST_S3_FOLDER}/${NON_GPU_DISPATCHERS_JSON_OUTPUT_FILE}"
136+
137+
BATCH_APP_ID="/${SERVICE_NAMES_PREFIX}batch-workload"
138+
BATCH_SCRIPT_CPUS=6
139+
BATCH_SCRIPT_MEM=12288
140+
BATCH_SUBMITS_PER_MIN=13
141+
BATCH_SPARK_BUILD_BRANCH=master
142+
143+
# Batch GPU jobs configuration #################################################
144+
145+
GPU_DISPATCHERS_JSON_OUTPUT_FILE_URL="https://${TEST_S3_BUCKET}.s3.amazonaws.com/${TEST_S3_FOLDER}/${GPU_DISPATCHERS_JSON_OUTPUT_FILE}"
146+
147+
GPU_APP_ID="/${SERVICE_NAMES_PREFIX}gpu-batch-workload"
148+
GPU_SCRIPT_CPUS=2
149+
GPU_SCRIPT_MEM=4096
150+
GPU_DOCKER_IMAGE='samvantran/spark-dcos-gpu:metrics'
151+
GPU_SUBMITS_PER_MIN=5
152+
GPU_MAX_NUM_DISPATCHERS=${GPU_NUM_DISPATCHERS}
153+
GPU_SPARK_CORES_MAX=4
154+
GPU_SPARK_MESOS_EXECUTOR_GPUS=4
155+
GPU_SPARK_MESOS_MAX_GPUS=4
156+
GPU_SPARK_BUILD_BRANCH=master

0 commit comments

Comments
 (0)