Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions .github/workflows/build_3.4.0.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

name: "Build and Test (3.4.0)"

on:
pull_request:
branches:
- 'master'
paths:
- '3.4.0/**'
- '.github/workflows/build_3.4.0.yaml'
- '.github/workflows/main.yml'

jobs:
run-build:
strategy:
matrix:
image-type: ["all", "python", "scala", "r"]
name: Run
secrets: inherit
uses: ./.github/workflows/main.yml
with:
spark: 3.4.0
scala: 2.12
java: 11
image-type: ${{ matrix.image-type }}
6 changes: 4 additions & 2 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@ on:
spark:
description: 'The Spark version of Spark image.'
required: true
default: '3.3.0'
default: '3.4.0'
type: choice
options:
- 3.3.0
- 3.4.0
- 3.3.2
- 3.3.1
- 3.3.0
publish:
description: 'Publish the image or not.'
default: false
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@ on:
spark:
description: 'The Spark version of Spark image.'
required: true
default: '3.3.1'
default: '3.4.0'
type: choice
options:
- 3.3.0
- 3.4.0
- 3.3.2
- 3.3.1
- 3.3.0
java:
description: 'The Java version of Spark image.'
default: 11
Expand Down
86 changes: 86 additions & 0 deletions 3.4.0/scala2.12-java11-python3-r-ubuntu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM eclipse-temurin:11-jre-focal

ARG spark_uid=185

RUN groupadd --system --gid=${spark_uid} spark && \
useradd --system --uid=${spark_uid} --gid=spark spark

RUN set -ex && \
apt-get update && \
ln -s /lib /lib64 && \
apt install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu && \
apt install -y python3 python3-pip && \
apt install -y r-base r-base-dev && \
mkdir -p /opt/spark && \
mkdir /opt/spark/python && \
mkdir -p /opt/spark/examples && \
mkdir -p /opt/spark/work-dir && \
touch /opt/spark/RELEASE && \
chown -R spark:spark /opt/spark && \
rm /bin/sh && \
ln -sv /bin/bash /bin/sh && \
echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \
rm -rf /var/cache/apt/* && \
rm -rf /var/lib/apt/lists/*

# Install Apache Spark
# https://downloads.apache.org/spark/KEYS
ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.4.0/spark-3.4.0-bin-hadoop3.tgz \
SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.4.0/spark-3.4.0-bin-hadoop3.tgz.asc \
GPG_KEY=CC68B3D16FE33A766705160BA7E57908C7A4E1B1

RUN set -ex; \
export SPARK_TMP="$(mktemp -d)"; \
cd $SPARK_TMP; \
wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \
wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \
export GNUPGHOME="$(mktemp -d)"; \
gpg --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \
gpg --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \
gpg --batch --verify spark.tgz.asc spark.tgz; \
gpgconf --kill all; \
rm -rf "$GNUPGHOME" spark.tgz.asc; \
\
tar -xf spark.tgz --strip-components=1; \
chown -R spark:spark .; \
mv jars /opt/spark/; \
mv bin /opt/spark/; \
mv sbin /opt/spark/; \
mv kubernetes/dockerfiles/spark/decom.sh /opt/; \
mv examples /opt/spark/; \
mv kubernetes/tests /opt/spark/; \
mv data /opt/spark/; \
mv python/pyspark /opt/spark/python/pyspark/; \
mv python/lib /opt/spark/python/lib/; \
mv R /opt/spark/; \
cd ..; \
rm -rf "$SPARK_TMP";

COPY entrypoint.sh /opt/

ENV SPARK_HOME /opt/spark
ENV R_HOME /usr/lib/R

WORKDIR /opt/spark/work-dir
RUN chmod g+w /opt/spark/work-dir
RUN chmod a+x /opt/decom.sh
RUN chmod a+x /opt/entrypoint.sh

ENTRYPOINT [ "/opt/entrypoint.sh" ]
114 changes: 114 additions & 0 deletions 3.4.0/scala2.12-java11-python3-r-ubuntu/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Check whether there is a passwd entry for the container UID
myuid=$(id -u)
mygid=$(id -g)
# turn off -e for getent because it will return error code in anonymous uid case
set +e
uidentry=$(getent passwd $myuid)
set -e

# If there is no passwd entry for the container UID, attempt to create one
if [ -z "$uidentry" ] ; then
if [ -w /etc/passwd ] ; then
echo "$myuid:x:$myuid:$mygid:${SPARK_USER_NAME:-anonymous uid}:$SPARK_HOME:/bin/false" >> /etc/passwd
else
echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
fi
fi

if [ -z "$JAVA_HOME" ]; then
JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}')
fi

SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
env | grep SPARK_JAVA_OPT_ | sort -t_ -k4 -n | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt
readarray -t SPARK_EXECUTOR_JAVA_OPTS < /tmp/java_opts.txt

if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
fi

if ! [ -z ${PYSPARK_PYTHON+x} ]; then
export PYSPARK_PYTHON
fi
if ! [ -z ${PYSPARK_DRIVER_PYTHON+x} ]; then
export PYSPARK_DRIVER_PYTHON
fi

# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor.
# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s.
if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then
export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)"
fi

if ! [ -z ${HADOOP_CONF_DIR+x} ]; then
SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH";
fi

if ! [ -z ${SPARK_CONF_DIR+x} ]; then
SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH";
elif ! [ -z ${SPARK_HOME+x} ]; then
SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH";
fi

case "$1" in
driver)
shift 1
CMD=(
"$SPARK_HOME/bin/spark-submit"
--conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
--deploy-mode client
"$@"
)
;;
executor)
shift 1
CMD=(
${JAVA_HOME}/bin/java
"${SPARK_EXECUTOR_JAVA_OPTS[@]}"
-Xms$SPARK_EXECUTOR_MEMORY
-Xmx$SPARK_EXECUTOR_MEMORY
-cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH"
org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend
--driver-url $SPARK_DRIVER_URL
--executor-id $SPARK_EXECUTOR_ID
--cores $SPARK_EXECUTOR_CORES
--app-id $SPARK_APPLICATION_ID
--hostname $SPARK_EXECUTOR_POD_IP
--resourceProfileId $SPARK_RESOURCE_PROFILE_ID
--podName $SPARK_EXECUTOR_POD_NAME
)
;;

*)
# Non-spark-on-k8s command provided, proceeding in pass-through mode...
CMD=("$@")
;;
esac

# Switch to spark if no USER specified (root by default) otherwise use USER directly
switch_spark_if_root() {
if [ $(id -u) -eq 0 ]; then
echo gosu spark
fi
}

# Execute the container CMD under tini for better hygiene
exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}"
83 changes: 83 additions & 0 deletions 3.4.0/scala2.12-java11-python3-ubuntu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM eclipse-temurin:11-jre-focal

ARG spark_uid=185

RUN groupadd --system --gid=${spark_uid} spark && \
useradd --system --uid=${spark_uid} --gid=spark spark

RUN set -ex && \
apt-get update && \
ln -s /lib /lib64 && \
apt install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu && \
apt install -y python3 python3-pip && \
mkdir -p /opt/spark && \
mkdir /opt/spark/python && \
mkdir -p /opt/spark/examples && \
mkdir -p /opt/spark/work-dir && \
touch /opt/spark/RELEASE && \
chown -R spark:spark /opt/spark && \
rm /bin/sh && \
ln -sv /bin/bash /bin/sh && \
echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \
rm -rf /var/cache/apt/* && \
rm -rf /var/lib/apt/lists/*

# Install Apache Spark
# https://downloads.apache.org/spark/KEYS
ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.4.0/spark-3.4.0-bin-hadoop3.tgz \
SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.4.0/spark-3.4.0-bin-hadoop3.tgz.asc \
GPG_KEY=CC68B3D16FE33A766705160BA7E57908C7A4E1B1

RUN set -ex; \
export SPARK_TMP="$(mktemp -d)"; \
cd $SPARK_TMP; \
wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \
wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \
export GNUPGHOME="$(mktemp -d)"; \
gpg --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \
gpg --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \
gpg --batch --verify spark.tgz.asc spark.tgz; \
gpgconf --kill all; \
rm -rf "$GNUPGHOME" spark.tgz.asc; \
\
tar -xf spark.tgz --strip-components=1; \
chown -R spark:spark .; \
mv jars /opt/spark/; \
mv bin /opt/spark/; \
mv sbin /opt/spark/; \
mv kubernetes/dockerfiles/spark/decom.sh /opt/; \
mv examples /opt/spark/; \
mv kubernetes/tests /opt/spark/; \
mv data /opt/spark/; \
mv python/pyspark /opt/spark/python/pyspark/; \
mv python/lib /opt/spark/python/lib/; \
cd ..; \
rm -rf "$SPARK_TMP";

COPY entrypoint.sh /opt/

ENV SPARK_HOME /opt/spark

WORKDIR /opt/spark/work-dir
RUN chmod g+w /opt/spark/work-dir
RUN chmod a+x /opt/decom.sh
RUN chmod a+x /opt/entrypoint.sh

ENTRYPOINT [ "/opt/entrypoint.sh" ]
Loading